diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake
index be088de898c862d75579abeb8be606738d77eee1..4711ce83da7d530a9b224bf7c3d077700433fdab 100644
--- a/cmake/external/xpu.cmake
+++ b/cmake/external/xpu.cmake
@@ -7,16 +7,20 @@ set(XPU_PROJECT "extern_xpu")
 set(XPU_API_LIB_NAME "libxpuapi.so")
 set(XPU_RT_LIB_NAME "libxpurt.so")
 
+set(XPU_BASE_DATE "20230114")
+set(XPU_XCCL_BASE_VERSION "1.0.7")
+
 if(NOT DEFINED XPU_BASE_URL)
   set(XPU_BASE_URL_WITHOUT_DATE
       "https://baidu-kunlun-product.su.bcebos.com/KL-SDK/klsdk-dev")
-  set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20230110")
+  set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/${XPU_BASE_DATE}")
 else()
   set(XPU_BASE_URL "${XPU_BASE_URL}")
 endif()
 
 set(XPU_XCCL_BASE_URL
-    "https://klx-sdk-release-public.su.bcebos.com/xccl/release/1.0.6")
+    "https://klx-sdk-release-public.su.bcebos.com/xccl/release/${XPU_XCCL_BASE_VERSION}"
+)
 
 if(WITH_AARCH64)
   set(XPU_XRE_DIR_NAME "xre-kylin_aarch64")
diff --git a/cmake/third_party.cmake b/cmake/third_party.cmake
index 602f966cf8ebc2c88d87903b956faa1bc40107e5..f181c60e6e98c242926753c9c15d70c2a466840e 100755
--- a/cmake/third_party.cmake
+++ b/cmake/third_party.cmake
@@ -321,8 +321,7 @@ endif()
 
 if(WITH_GPU)
   if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0
-     OR (${CMAKE_CUDA_COMPILER_VERSION} GREATER_EQUAL 11.6
-         AND ${CMAKE_CUDA_COMPILER_VERSION} LESS 11.8))
+     OR (WIN32 AND ${CMAKE_CUDA_COMPILER_VERSION} GREATER_EQUAL 11.6))
     include(external/cub) # download cub
     list(APPEND third_party_deps extern_cub)
   endif()
diff --git a/paddle/fluid/distributed/CMakeLists.txt b/paddle/fluid/distributed/CMakeLists.txt
index 97e44772962bbaf8442d132dc4e6a50d91e02f18..40400c7de38561cc3b4bfb5a84407882b5879df1 100755
--- a/paddle/fluid/distributed/CMakeLists.txt
+++ b/paddle/fluid/distributed/CMakeLists.txt
@@ -2,18 +2,26 @@ add_subdirectory(auto_parallel)
 add_subdirectory(collective)
 add_subdirectory(fleet_executor)
 if(WITH_PYTHON)
+  py_proto_compile(pslib_py_proto SRCS ps.proto)
   py_proto_compile(ps_py_proto SRCS the_one_ps.proto)
   add_custom_target(
     ps_py_proto_init ALL
     COMMAND ${CMAKE_COMMAND} -E make_directory
             ${PADDLE_BINARY_DIR}/python/paddle/distributed/fleet/proto)
   add_dependencies(ps_py_proto ps_py_proto_init)
+  set(PSLIB_PROTO_DSTPATH
+      "${PADDLE_SOURCE_DIR}/python/paddle/fluid/incubate/fleet/parameter_server/pslib/"
+  )
   if(NOT WIN32)
     add_custom_command(
       TARGET ps_py_proto
       POST_BUILD
       COMMAND mv the_one_ps_pb2.py
               ${PADDLE_BINARY_DIR}/python/paddle/distributed/fleet/proto/)
+    add_custom_command(
+      TARGET pslib_py_proto
+      POST_BUILD
+      COMMAND mv ps_pb2.py "${PSLIB_PROTO_DSTPATH}")
   else()
     string(
       REPLACE "/" "\\" fleet_proto_dstpath
@@ -25,7 +33,15 @@ if(WITH_PYTHON)
       COMMENT
         "Copy generated python the_one_ps_pb2 into directory ${fleet_proto_dstpath}."
     )
+    string(REPLACE "/" "\\" PSLIB_PROTO_DSTPATH "${PSLIB_PROTO_DSTPATH}")
+    add_custom_command(
+      TARGET pslib_py_proto
+      POST_BUILD
+      COMMAND copy /Y ps_pb2.py ${PSLIB_PROTO_DSTPATH})
   endif()
+  message(
+    STATUS
+      "Copy generated python ps_pb2.py into directory ${PSLIB_PROTO_DSTPATH}")
 endif()
 
 if(WITH_RPC)
diff --git a/paddle/fluid/distributed/collective/process_group_bkcl.cc b/paddle/fluid/distributed/collective/process_group_bkcl.cc
index de4a84bff4808f96ba5fecb3bb7f1fece54c3870..0bfa04932c3f1414ac5a3073cf150c78a728965d 100644
--- a/paddle/fluid/distributed/collective/process_group_bkcl.cc
+++ b/paddle/fluid/distributed/collective/process_group_bkcl.cc
@@ -352,41 +352,17 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupBKCL::Reduce(
           const phi::DenseTensor& input,
           BKCLContext_t comm,
           const XPUStream& stream) {
-        phi::DenseTensor output_t;
-        paddle::framework::TensorCopy(*output, platform::XPUPlace(), &output_t);
-        const auto& place = input.place();
-        auto* calc_ctx = static_cast<phi::XPUContext*>(
-            platform::DeviceContextPool::Instance().Get(place));
-        switch (input.dtype()) {
-          case phi::DataType::FLOAT32:
-            calc_ctx->template Alloc<float>(&output_t);
-            break;
-          case phi::DataType::FLOAT16:
-            calc_ctx->template Alloc<float16>(&output_t);
-            break;
-          case phi::DataType::INT32:
-            calc_ctx->template Alloc<int>(&output_t);
-            break;
-          default:
-            VLOG(0) << "Error: type " << input.dtype() << " not supported for "
-                    << GetBackendName();
-            break;
-        }
-        int ret =
-            bkcl_all_reduce(comm,
-                            input.data(),
-                            output_t.data(),
-                            input.numel(),
-                            platform::ToBKCLDataType(
-                                framework::TransToProtoVarType(input.type())),
-                            ToBKCLRedType(opts.reduce_op),
-                            stream);
-        if (rank_ == opts.root_rank) {
-          *output = output_t;
-        }
-        return ret;
+        return bkcl_reduce(comm,
+                           input.data(),
+                           output->data(),
+                           input.numel(),
+                           platform::ToBKCLDataType(
+                               framework::TransToProtoVarType(input.type())),
+                           ToBKCLRedType(opts.reduce_op),
+                           opts.root_rank,
+                           stream);
       },
-      CommType::ALLREDUCE,
+      CommType::REDUCE,
       sync_op,
       use_calc_stream);
 }
diff --git a/paddle/fluid/distributed/fleet_executor/CMakeLists.txt b/paddle/fluid/distributed/fleet_executor/CMakeLists.txt
index cc5ed287e954f67d9c2877a413333d72a4bde534..9cf1cdde223488a5ae5a56676b1d97bdeac93b5c 100755
--- a/paddle/fluid/distributed/fleet_executor/CMakeLists.txt
+++ b/paddle/fluid/distributed/fleet_executor/CMakeLists.txt
@@ -36,6 +36,7 @@ cc_library(
        interceptor.cc
        compute_interceptor.cc
        amplifier_interceptor.cc
+       cond_interceptor.cc
        source_interceptor.cc
        sink_interceptor.cc
        message_service.cc
@@ -66,6 +67,8 @@ if(WITH_DISTRIBUTE)
   set_source_files_properties(
     amplifier_interceptor.cc PROPERTIES COMPILE_FLAGS
                                         ${DISTRIBUTE_COMPILE_FLAGS})
+  set_source_files_properties(
+    cond_interceptor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
   set_source_files_properties(
     source_interceptor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
   set_source_files_properties(
diff --git a/paddle/fluid/distributed/fleet_executor/carrier.cc b/paddle/fluid/distributed/fleet_executor/carrier.cc
index 3449c87998a9dba21824e854afdb7216cb818164..094afff577a9e851640cfe947f72656d8395e556 100644
--- a/paddle/fluid/distributed/fleet_executor/carrier.cc
+++ b/paddle/fluid/distributed/fleet_executor/carrier.cc
@@ -33,6 +33,7 @@ USE_INTERCEPTOR(Source);
 USE_INTERCEPTOR(Compute);
 USE_INTERCEPTOR(Amplifier);
 USE_INTERCEPTOR(Sink);
+USE_INTERCEPTOR(Cond);
 
 void Carrier::Init(
     int64_t rank,
@@ -96,29 +97,30 @@ void Carrier::CopyParameters(
     int microbatch_id,
     const framework::ProgramDesc& program,
     const std::vector<std::string>& inference_root_scope_vars) {
-  auto& global_block = program.Block(0);
-
   std::map<std::string, int> inference_root_scope_var_map;
   for (auto var_name : inference_root_scope_vars) {
     inference_root_scope_var_map.insert({var_name, 1});
   }
-  for (auto& var : global_block.AllVars()) {
-    std::string var_name = var->Name();
-    bool force_root = inference_root_scope_var_map.find(var_name) !=
-                      inference_root_scope_var_map.end();
-    if (force_root) {
-      VLOG(4) << var_name << " will be forced to be created in the root scope.";
-    }
-    if ((var->Persistable() || force_root) && microbatch_id == 0) {
-      auto* ptr = root_scope_->Var(var->Name());
-      InitializeVariable(ptr, var->GetType());
-      VLOG(5) << "Create persistable var: " << var->Name()
-              << ", which pointer is " << ptr;
-    } else if (!var->Persistable()) {
-      auto* ptr = microbatch_scopes_[microbatch_id]->Var(var->Name());
-      VLOG(5) << "Create variable " << var->Name() << " for microbatch "
-              << microbatch_id << ", which pointer is " << ptr << ".";
-      InitializeVariable(ptr, var->GetType());
+  for (size_t i = 0; i < program.Size(); ++i) {
+    for (auto& var : program.Block(i).AllVars()) {
+      std::string var_name = var->Name();
+      bool force_root = inference_root_scope_var_map.find(var_name) !=
+                        inference_root_scope_var_map.end();
+      if (force_root) {
+        VLOG(4) << var_name
+                << " will be forced to be created in the root scope.";
+      }
+      if ((var->Persistable() || force_root) && microbatch_id == 0) {
+        auto* ptr = root_scope_->Var(var->Name());
+        InitializeVariable(ptr, var->GetType());
+        VLOG(5) << "Create persistable var: " << var->Name()
+                << ", which pointer is " << ptr;
+      } else if (!var->Persistable()) {
+        auto* ptr = microbatch_scopes_[microbatch_id]->Var(var->Name());
+        VLOG(5) << "Create variable " << var->Name() << " for microbatch "
+                << microbatch_id << ", which pointer is " << ptr << ".";
+        InitializeVariable(ptr, var->GetType());
+      }
     }
   }
 }
diff --git a/paddle/fluid/distributed/fleet_executor/compute_interceptor.cc b/paddle/fluid/distributed/fleet_executor/compute_interceptor.cc
index 5017f81523c8aea31fb8732e001e4af311313d32..9aedaa131400f3bfd6be24953050071e8970a557 100644
--- a/paddle/fluid/distributed/fleet_executor/compute_interceptor.cc
+++ b/paddle/fluid/distributed/fleet_executor/compute_interceptor.cc
@@ -125,6 +125,7 @@ void ComputeInterceptor::SendDataReadyToDownStream() {
 
     InterceptorMessage ready_msg;
     ready_msg.set_message_type(DATA_IS_READY);
+    ready_msg.set_scope_idx(cur_scope_id_);
     VLOG(3) << "ComputeInterceptor " << interceptor_id_
             << " Send data_is_ready msg to " << down_id
             << " in scope: " << cur_scope_id_;
@@ -152,6 +153,7 @@ void ComputeInterceptor::ReplyCompletedToUpStream() {
 
     InterceptorMessage reply_msg;
     reply_msg.set_message_type(DATA_IS_USELESS);
+    reply_msg.set_scope_idx(cur_scope_id_);
     Send(up_id, reply_msg);
   }
 }
diff --git a/paddle/fluid/distributed/fleet_executor/cond_interceptor.cc b/paddle/fluid/distributed/fleet_executor/cond_interceptor.cc
new file mode 100644
index 0000000000000000000000000000000000000000..1d82b73fb898c7d2cd81bcd4e60d16dfea56c777
--- /dev/null
+++ b/paddle/fluid/distributed/fleet_executor/cond_interceptor.cc
@@ -0,0 +1,141 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/distributed/fleet_executor/cond_interceptor.h"
+#include "paddle/fluid/distributed/fleet_executor/task_node.h"
+#include "paddle/fluid/framework/executor_gc_helper.h"
+#include "paddle/fluid/framework/operator.h"
+#include "paddle/fluid/platform/errors.h"
+#include "paddle/fluid/platform/place.h"
+#include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/core/errors.h"
+
+namespace paddle {
+namespace distributed {
+
+CondInterceptor::CondInterceptor(int64_t interceptor_id, TaskNode* node)
+    : Interceptor(interceptor_id, node) {
+  PrepareDeps();
+  RegisterMsgHandle([this](const InterceptorMessage& msg) { Run(msg); });
+}
+
+void CondInterceptor::PrepareDeps() {
+  auto& upstream = node_->upstream();
+  auto& downstream = node_->downstream();
+  auto& id_to_dep_type = node_->id_to_dep_type();
+
+  for (const auto& up : upstream) {
+    if (id_to_dep_type.at(up.first) == DependType::NORMAL) {
+      normal_in_id_.insert(up.first);
+    }
+  }
+
+  for (const auto& down : downstream) {
+    if (id_to_dep_type.at(down.first) == DependType::NORMAL) {
+      normal_out_id_.insert(down.first);
+    } else if (id_to_dep_type.at(down.first) == DependType::STOP_LOOP) {
+      stop_loop_id_ = down.first;
+    }
+  }
+}
+
+bool CondInterceptor::GetCondResult() {
+  PADDLE_ENFORCE_LT(cur_scope_id_,
+                    microbatch_scopes_.size(),
+                    platform::errors::InvalidArgument(
+                        "Step out of range. There are %ld "
+                        "microbatch_scopes, but recevice scope index %ld",
+                        microbatch_scopes_.size(),
+                        cur_scope_id_));
+  auto* cond_var =
+      microbatch_scopes_[cur_scope_id_]->FindVar(node_->cond_var());
+  PADDLE_ENFORCE(cond_var,
+                 platform::errors::NotFound(
+                     "Condition variable %s not exists in scope %ld",
+                     node_->cond_var(),
+                     cur_scope_id_));
+  const auto& cond_tensor = cond_var->Get<phi::DenseTensor>();
+  bool res = false;
+  if (platform::is_gpu_place(cond_tensor.place())) {
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+    phi::DenseTensor cpu_tensor;
+    framework::TensorCopy(cond_tensor, platform::CPUPlace(), &cpu_tensor);
+    platform::DeviceContextPool::Instance().Get(cond_tensor.place())->Wait();
+    res = cpu_tensor.data<bool>()[0];
+#endif
+  } else if (platform::is_cpu_place(cond_tensor.place())) {
+    res = cond_tensor.data<bool>()[0];
+  } else {
+    PADDLE_THROW(platform::errors::Unimplemented(
+        "Unsupport device for cond interceptor."));
+  }
+  return res;
+}
+
+void CondInterceptor::SendDataReady(int64_t down_id) {
+  InterceptorMessage ready_msg;
+  ready_msg.set_message_type(DATA_IS_READY);
+  ready_msg.set_scope_idx(cur_scope_id_);
+  Send(down_id, ready_msg);
+}
+
+void CondInterceptor::ReplyDataIsUseless(int64_t up_id) {
+  InterceptorMessage ready_msg;
+  ready_msg.set_message_type(DATA_IS_USELESS);
+  ready_msg.set_scope_idx(cur_scope_id_);
+  Send(up_id, ready_msg);
+}
+
+void CondInterceptor::Compute() {
+  cur_scope_id_ = ready_queue_.front();
+  ready_queue_.pop();
+  bool cond = GetCondResult();
+  VLOG(3) << "Cond interceptor get condition var " << node_->cond_var()
+          << " with value " << cond;
+  if (cond) {
+    VLOG(3) << "Loop again in scope " << cur_scope_id_;
+    for (auto& down_id : normal_out_id_) {
+      SendDataReady(down_id);
+    }
+  } else {
+    VLOG(3) << "Finish loop in scope " << cur_scope_id_;
+    SendDataReady(stop_loop_id_);
+  }
+}
+
+void CondInterceptor::Run(const InterceptorMessage& msg) {
+  if (msg.message_type() == DATA_IS_READY) {
+    ready_queue_.push(msg.scope_idx());
+    Compute();
+  } else if (msg.message_type() == DATA_IS_USELESS) {
+    if (node_->id_to_dep_type().at(msg.src_id()) == DependType::STOP_LOOP) {
+      for (auto& up_id : normal_in_id_) {
+        ReplyDataIsUseless(up_id);
+      }
+      // Gc the variable in while block
+      int64_t scope_id = msg.scope_idx();
+      if (gc_) {
+        VLOG(3) << "Release vars in while block in scope " << scope_id;
+        framework::DeleteUnusedTensors(*microbatch_scopes_[scope_id],
+                                       node_->while_block_vars(),
+                                       gc_.get());
+      }
+    }
+  }
+}
+
+REGISTER_INTERCEPTOR(Cond, CondInterceptor);
+
+}  // namespace distributed
+}  // namespace paddle
diff --git a/paddle/fluid/distributed/fleet_executor/cond_interceptor.h b/paddle/fluid/distributed/fleet_executor/cond_interceptor.h
new file mode 100644
index 0000000000000000000000000000000000000000..81b001135f189ef7e85ee279774b103d7dec7368
--- /dev/null
+++ b/paddle/fluid/distributed/fleet_executor/cond_interceptor.h
@@ -0,0 +1,51 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <queue>
+#include "paddle/fluid/distributed/fleet_executor/interceptor.h"
+
+namespace paddle {
+namespace distributed {
+
+/* Condition Interceptor
+ * This is a special interceptor and only one condition op in the task node.
+ * This interceptor has two downstreams,
+ *  1. If the program result is true, select one of the downstreams, otherwise
+ * select another.
+ *  2. Used to implement while op in program.
+ */
+class CondInterceptor final : public Interceptor {
+ public:
+  CondInterceptor(int64_t interceptor_id, TaskNode* node);
+
+ private:
+  void PrepareDeps();
+  void Run(const InterceptorMessage& msg);
+  void Compute();
+  bool GetCondResult();
+  void SendDataReady(int64_t down_id);
+  void ReplyDataIsUseless(int64_t up_id);
+
+  std::queue<int64_t> ready_queue_;
+  int64_t cur_scope_id_;
+
+  std::set<int64_t> normal_in_id_;
+  std::set<int64_t> normal_out_id_;
+  int64_t stop_loop_id_;
+};
+
+}  // namespace distributed
+}  // namespace paddle
diff --git a/paddle/fluid/distributed/fleet_executor/fleet_executor.cc b/paddle/fluid/distributed/fleet_executor/fleet_executor.cc
index a2d2ecd9bbf106c1ca3c774fc338c8a1eb82fe20..1f397a91746b96035fa420452f06702a43ef2c45 100644
--- a/paddle/fluid/distributed/fleet_executor/fleet_executor.cc
+++ b/paddle/fluid/distributed/fleet_executor/fleet_executor.cc
@@ -66,12 +66,11 @@ void FleetExecutor::Init(
                         "Fleet executor is inited with empty task node"));
   // TODO(fleet_exe devs): the unused_vars should be got from run time graph
   std::vector<std::unique_ptr<framework::OperatorBase>> ops;
-  for (auto task_node : task_nodes) {
-    for (auto op : task_node->ops()) {
-      ops.emplace_back(std::unique_ptr<framework::OperatorBase>(op));
-    }
+  for (const auto& desc : program_desc.Block(0).AllOps()) {
+    ops.emplace_back(framework::OpRegistry::CreateOp(*desc));
   }
   auto unused_vars = framework::GetUnusedVars(program_desc.Block(0), ops, {});
+
   // NOTE: For inference, the vars in inference_root_scope_vars
   // shouldn't be deleted during inf, for that they may be the result of the
   // inf. If they are GCed, it will cause error during ZeroCopy the result.
@@ -107,6 +106,25 @@ void FleetExecutor::Init(
   std::unordered_map<int64_t, TaskNode*> interceptor_id_to_task;
   for (auto task_node : task_nodes) {
     task_node->SetUnusedVars(unused_vars);
+    if (task_node->type() == "Cond") {
+      std::vector<std::string> while_block_vars;
+      std::vector<std::string> vars_in_parent;
+      std::vector<std::string> vars_in_sub;
+      for (auto& var : program_desc.Block(0).AllVars()) {
+        vars_in_parent.emplace_back(var->Name());
+      }
+      for (auto& var : program_desc.Block(1).AllVars()) {
+        vars_in_sub.emplace_back(var->Name());
+      }
+      std::sort(vars_in_parent.begin(), vars_in_parent.end());
+      std::sort(vars_in_sub.begin(), vars_in_sub.end());
+      std::set_difference(vars_in_sub.begin(),
+                          vars_in_sub.end(),
+                          vars_in_parent.begin(),
+                          vars_in_parent.end(),
+                          std::back_inserter(while_block_vars));
+      task_node->SetWhileBlockVars(while_block_vars);
+    }
     int64_t interceptor_id = task_node->task_id();
     interceptor_id_to_task.emplace(interceptor_id, task_node);
   }
diff --git a/paddle/fluid/distributed/fleet_executor/task_node.cc b/paddle/fluid/distributed/fleet_executor/task_node.cc
index 341ffe290a52055143db2729d30dd18582cbb6df..4319ffd368b6c6770d1ff0265b07ca784ee9c153 100644
--- a/paddle/fluid/distributed/fleet_executor/task_node.cc
+++ b/paddle/fluid/distributed/fleet_executor/task_node.cc
@@ -24,33 +24,14 @@ namespace {
 using OperatorBase = TaskNode::OperatorBase;
 }
 
-TaskNode::TaskNode(paddle::framework::ProgramDesc* program,
-                   int64_t rank,
-                   int64_t max_run_times,
-                   int64_t max_slot_nums)
-    : program_(program),
-      rank_(rank),
-      max_run_times_(max_run_times),
-      max_slot_nums_(max_slot_nums) {
-  // Should be serially invoked, not thread-safe
-  // NOTE: when instantiate TaskNode with program, won't init task node
-  // immediately, since the provided program may be updated later (with
-  // high probability) by adding_feed_fetch_ops or by RuntimeGraph.
-  // So, delay the init part to the Init() function.
-  static int64_t task_node_cnt = 0;
-  task_id_ = task_node_cnt++;
-}
-
 TaskNode::TaskNode(paddle::framework::ProgramDesc* program,
                    int64_t rank,
                    int64_t task_id,
-                   int64_t max_run_times,
-                   int64_t max_slot_nums)
+                   int64_t max_run_times)
     : program_(program),
       rank_(rank),
       task_id_(task_id),
-      max_run_times_(max_run_times),
-      max_slot_nums_(max_slot_nums) {
+      max_run_times_(max_run_times) {
   // TODO(liyurui): Will be removed when execute program is supported.
   Init();
 }
@@ -58,7 +39,6 @@ TaskNode::TaskNode(paddle::framework::ProgramDesc* program,
 TaskNode::TaskNode(paddle::framework::ProgramDesc* program, int64_t rank)
     : program_(program), rank_(rank), task_id_(rank) {
   max_run_times_ = 1;
-  max_slot_nums_ = 1;
   LOG(INFO)
       << "Constructing TaskNode for DistModelInf. The TaskNode's id is: "
       << rank
@@ -98,13 +78,11 @@ TaskNode::TaskNode(int32_t role,
                    const std::vector<framework::OpDesc*>& op_descs,
                    int64_t rank,
                    int64_t task_id,
-                   int64_t max_run_times,
-                   int64_t max_slot_nums)
+                   int64_t max_run_times)
     : role_(role),
       rank_(rank),
       task_id_(task_id),
-      max_run_times_(max_run_times),
-      max_slot_nums_(max_slot_nums) {
+      max_run_times_(max_run_times) {
   if (op_descs.empty()) {
     return;
   }
@@ -121,33 +99,35 @@ TaskNode::TaskNode(int32_t role,
                    const std::vector<framework::OperatorBase*>& ops,
                    int64_t rank,
                    int64_t task_id,
-                   int64_t max_run_times,
-                   int64_t max_slot_nums)
+                   int64_t max_run_times)
     : ops_(ops),
       role_(role),
       rank_(rank),
       task_id_(task_id),
-      max_run_times_(max_run_times),
-      max_slot_nums_(max_slot_nums) {}
+      max_run_times_(max_run_times) {}
 
 TaskNode::TaskNode(int32_t role,
                    int64_t rank,
                    int64_t task_id,
-                   int64_t max_run_times,
-                   int64_t max_slot_nums)
+                   int64_t max_run_times)
     : role_(role),
       rank_(rank),
       task_id_(task_id),
-      max_run_times_(max_run_times),
-      max_slot_nums_(max_slot_nums) {}
+      max_run_times_(max_run_times) {}
 
-bool TaskNode::AddUpstreamTask(int64_t task_id, int64_t buff_size) {
+bool TaskNode::AddUpstreamTask(int64_t task_id,
+                               int64_t buff_size,
+                               DependType type) {
   const auto& ret = upstream_.emplace(task_id, buff_size);
+  id_to_dep_type_.emplace(task_id, type);
   return ret.second;
 }
 
-bool TaskNode::AddDownstreamTask(int64_t task_id, int64_t buff_size) {
+bool TaskNode::AddDownstreamTask(int64_t task_id,
+                                 int64_t buff_size,
+                                 DependType type) {
   const auto& ret = downstream_.emplace(task_id, buff_size);
+  id_to_dep_type_.emplace(task_id, type);
   return ret.second;
 }
 
diff --git a/paddle/fluid/distributed/fleet_executor/task_node.h b/paddle/fluid/distributed/fleet_executor/task_node.h
index 8538ac9ff81faccac10f6c3dddd2d8f143268ccf..0f234dd172102edb094c628b2995709676970440 100644
--- a/paddle/fluid/distributed/fleet_executor/task_node.h
+++ b/paddle/fluid/distributed/fleet_executor/task_node.h
@@ -14,8 +14,10 @@
 
 #pragma once
 #include <cstdint>
+#include <functional>
 #include <memory>
 #include <string>
+#include <unordered_map>
 #include <unordered_set>
 #include <vector>
 
@@ -29,38 +31,30 @@ class OpDesc;
 }  // namespace framework
 namespace distributed {
 
+enum class DependType { NORMAL, LOOP, STOP_LOOP };
+
 class TaskNode final {
  public:
   using OperatorBase = paddle::framework::OperatorBase;
   TaskNode(int64_t rank, int64_t task_id, int64_t max_run_times);
-  TaskNode(int32_t role,
-           int64_t rank,
-           int64_t task_id,
-           int64_t max_run_times,
-           int64_t max_slot_nums);
+  TaskNode(int32_t role, int64_t rank, int64_t task_id, int64_t max_run_times);
   TaskNode(int32_t role,
            const std::vector<framework::OpDesc*>& op_descs,
            int64_t rank,
            int64_t task_id,
-           int64_t max_run_times,
-           int64_t max_slot_nums);
+           int64_t max_run_times);
   TaskNode(int32_t role,
            const std::vector<framework::OperatorBase*>& ops,
            int64_t rank,
            int64_t task_id,
-           int64_t max_run_times,
-           int64_t max_slot_nums);
-  TaskNode(paddle::framework::ProgramDesc* program,
-           int64_t rank,
-           int64_t max_run_times,
-           int64_t max_slot_nums);
+           int64_t max_run_times);
   TaskNode(paddle::framework::ProgramDesc* program, int64_t rank);
   // TODO(liyurui): This will be the only constructor for task node
   TaskNode(paddle::framework::ProgramDesc* program,
            int64_t task_id,
            int64_t rank,
-           int64_t max_run_times,
-           int64_t max_slot_nums);
+           int64_t max_run_times);
+
   ~TaskNode() = default;
 
   void SetProgram(paddle::framework::ProgramDesc* program);
@@ -69,11 +63,11 @@ class TaskNode final {
   int64_t task_id() const { return task_id_; }
   int32_t role() const { return role_; }
   int64_t max_run_times() const { return max_run_times_; }
-  int64_t max_slot_nums() const { return max_slot_nums_; }
   int64_t run_per_steps() const { return run_per_steps_; }
   int64_t run_at_offset() const { return run_at_offset_; }
   int64_t reply_up_per_steps() const { return reply_up_per_steps_; }
   int64_t send_down_per_steps() const { return send_down_per_steps_; }
+  const std::string& cond_var() const { return cond_var_; }
   const std::unordered_map<int64_t, int64_t>& upstream() const {
     return upstream_;
   }
@@ -86,11 +80,20 @@ class TaskNode final {
   const std::vector<std::unique_ptr<OperatorBase>>& unique_ops() const {
     return ops_vec_;
   }
+  const std::unordered_map<int64_t, DependType> id_to_dep_type() const {
+    return id_to_dep_type_;
+  }
   const std::unordered_map<const OperatorBase*, std::vector<std::string>>&
   unused_vars() const {
     return unused_vars_;
   }
+  const std::vector<std::string> while_block_vars() const {
+    return while_block_vars_;
+  }
 
+  void SetCondVarName(const std::string& cond_var_name) {
+    cond_var_ = cond_var_name;
+  }
   void SetRunPerSteps(int64_t value);
   void SetRunAtOffset(int64_t value);
   void SetReplyUpPerSteps(int64_t value);
@@ -101,10 +104,17 @@ class TaskNode final {
           unused_vars) {
     unused_vars_ = unused_vars;
   }
+  void SetWhileBlockVars(const std::vector<std::string>& vars) {
+    while_block_vars_ = vars;
+  }
 
   // upstream need buffs?
-  bool AddUpstreamTask(int64_t task_id, int64_t buff_size = 1);
-  bool AddDownstreamTask(int64_t task_id, int64_t buff_size = 1);
+  bool AddUpstreamTask(int64_t task_id,
+                       int64_t buff_size = 1,
+                       DependType type = DependType::NORMAL);
+  bool AddDownstreamTask(int64_t task_id,
+                         int64_t buff_size = 1,
+                         DependType type = DependType::NORMAL);
   std::string DebugString() const;
 
  private:
@@ -115,16 +125,20 @@ class TaskNode final {
   // task_id-->buff_size
   std::unordered_map<int64_t, int64_t> upstream_;
   std::unordered_map<int64_t, int64_t> downstream_;
+  // task_id-->type
+  std::unordered_map<int64_t, DependType> id_to_dep_type_;
+
   framework::ProgramDesc* program_;
+  std::string cond_var_;
   std::vector<std::unique_ptr<OperatorBase>> ops_vec_;
   std::unordered_map<const OperatorBase*, std::vector<std::string>>
       unused_vars_;
+  std::vector<std::string> while_block_vars_;
 
   int32_t role_;
   int64_t rank_;
   int64_t task_id_;
   int64_t max_run_times_;
-  int64_t max_slot_nums_;
 
   int64_t run_per_steps_{1};
   int64_t run_at_offset_{0};
diff --git a/paddle/fluid/distributed/fleet_executor/test/compute_interceptor_run_op_test.cc b/paddle/fluid/distributed/fleet_executor/test/compute_interceptor_run_op_test.cc
index de317d843ad79841c8e52896155cd2d3ab547fc9..63d4fa1bf97049cc5654975a58b3af84e50ce8e5 100644
--- a/paddle/fluid/distributed/fleet_executor/test/compute_interceptor_run_op_test.cc
+++ b/paddle/fluid/distributed/fleet_executor/test/compute_interceptor_run_op_test.cc
@@ -77,9 +77,8 @@ TEST(ComputeInterceptor, Compute) {
   // FIXME: don't delete, otherwise interceptor will use undefined node
   TaskNode* source =
       new TaskNode(0, SOURCE_ID, 2);  // rank, task_id, max_run_times
-  TaskNode* node_a =
-      new TaskNode(0, ops, 0, 0, 2, 0);  // role, ops, rank, task_id
-  TaskNode* node_b = new TaskNode(0, 0, 1, 2, 0);
+  TaskNode* node_a = new TaskNode(0, ops, 0, 0, 2);  // role, ops, rank, task_id
+  TaskNode* node_b = new TaskNode(0, 0, 1, 2);
   TaskNode* sink = new TaskNode(0, SINK_ID, 2);
 
   // source->a->b->sink
diff --git a/paddle/fluid/distributed/fleet_executor/test/compute_interceptor_test.cc b/paddle/fluid/distributed/fleet_executor/test/compute_interceptor_test.cc
index e484031161489f4e6cd54403fbd15da0128433e8..618e55ba6ef6a922aceb2d90d15fe8f3a0866e43 100644
--- a/paddle/fluid/distributed/fleet_executor/test/compute_interceptor_test.cc
+++ b/paddle/fluid/distributed/fleet_executor/test/compute_interceptor_test.cc
@@ -37,8 +37,8 @@ TEST(ComputeInterceptor, Compute) {
   // NOTE: don't delete, otherwise interceptor will use undefined node
   TaskNode* source =
       new TaskNode(0, SOURCE_ID, 3);  // rank, task_id, max_run_times
-  TaskNode* node_a = new TaskNode(0, 0, 0, 3, 0);
-  TaskNode* node_b = new TaskNode(0, 0, 1, 3, 0);
+  TaskNode* node_a = new TaskNode(0, 0, 0, 3);
+  TaskNode* node_b = new TaskNode(0, 0, 1, 3);
   TaskNode* sink = new TaskNode(0, SINK_ID, 3);
 
   // source->a->b->sink
diff --git a/paddle/fluid/distributed/fleet_executor/test/interceptor_pipeline_long_path_test.cc b/paddle/fluid/distributed/fleet_executor/test/interceptor_pipeline_long_path_test.cc
index 3415e377478d488f02fed0e4c4140760422e6662..3dca7aed1414b387626250fc1b2ffe4f80f54da4 100644
--- a/paddle/fluid/distributed/fleet_executor/test/interceptor_pipeline_long_path_test.cc
+++ b/paddle/fluid/distributed/fleet_executor/test/interceptor_pipeline_long_path_test.cc
@@ -71,12 +71,12 @@ TEST(AmplifierInterceptor, Amplifier) {
   // NOTE: don't delete, otherwise interceptor will use undefined node
   TaskNode* source =
       new TaskNode(0, SOURCE_ID, micro_steps);  // rank, task_id, max_run_times
-  TaskNode* node_a = new TaskNode(0, 0, 0, 1, 0);  // role, rank, task_id
-  TaskNode* node_b = new TaskNode(0, 0, 1, 1, 0);
-  TaskNode* node_c = new TaskNode(0, 0, 2, 1, 0);
-  TaskNode* node_d = new TaskNode(0, 0, 3, 1, 0);
-  TaskNode* node_e = new TaskNode(0, 0, 4, 1, 0);
-  TaskNode* node_f = new TaskNode(0, 0, 5, 1, 0);
+  TaskNode* node_a = new TaskNode(0, 0, 0, 1);  // role, rank, task_id
+  TaskNode* node_b = new TaskNode(0, 0, 1, 1);
+  TaskNode* node_c = new TaskNode(0, 0, 2, 1);
+  TaskNode* node_d = new TaskNode(0, 0, 3, 1);
+  TaskNode* node_e = new TaskNode(0, 0, 4, 1);
+  TaskNode* node_f = new TaskNode(0, 0, 5, 1);
   TaskNode* sink = new TaskNode(0, SINK_ID, micro_steps);
 
   // source->a->b->c->d->e->f->sink
diff --git a/paddle/fluid/distributed/fleet_executor/test/interceptor_pipeline_short_path_test.cc b/paddle/fluid/distributed/fleet_executor/test/interceptor_pipeline_short_path_test.cc
index fdee01fed1a05c03ccee923277fb561daf07d17b..3101ad5f4898eddf3ffca1314defbe06913e61cd 100644
--- a/paddle/fluid/distributed/fleet_executor/test/interceptor_pipeline_short_path_test.cc
+++ b/paddle/fluid/distributed/fleet_executor/test/interceptor_pipeline_short_path_test.cc
@@ -83,11 +83,10 @@ TEST(AmplifierInterceptor, Amplifier) {
   // NOTE: don't delete, otherwise interceptor will use undefined node
   TaskNode* source =
       new TaskNode(0, SOURCE_ID, micro_steps);  // rank, task_id, max_run_times
-  TaskNode* node_a =
-      new TaskNode(0, 0, 0, micro_steps, 0);  // role, rank, task_id
-  TaskNode* node_b = new TaskNode(0, 0, 1, 3, 0);
-  TaskNode* node_c = new TaskNode(0, 0, 2, 3, 0);
-  TaskNode* node_d = new TaskNode(0, 0, 3, micro_steps, 0);
+  TaskNode* node_a = new TaskNode(0, 0, 0, micro_steps);  // role, rank, task_id
+  TaskNode* node_b = new TaskNode(0, 0, 1, 3);
+  TaskNode* node_c = new TaskNode(0, 0, 2, 3);
+  TaskNode* node_d = new TaskNode(0, 0, 3, micro_steps);
   TaskNode* sink = new TaskNode(0, SINK_ID, micro_steps);
 
   // source->a->b->c->d->sink
diff --git a/paddle/fluid/distributed/fleet_executor/test/sink_interceptor_test.cc b/paddle/fluid/distributed/fleet_executor/test/sink_interceptor_test.cc
index 879d7e9b02941811e8fa8e9d6bd3f0e6f5339851..b2b1d06634bd8296b42e81b874244409e9e9eac5 100644
--- a/paddle/fluid/distributed/fleet_executor/test/sink_interceptor_test.cc
+++ b/paddle/fluid/distributed/fleet_executor/test/sink_interceptor_test.cc
@@ -62,10 +62,9 @@ TEST(SourceInterceptor, Source) {
   msg_bus->Init(0, {{0, "127.0.0.0:0"}}, "");
 
   // NOTE: don't delete, otherwise interceptor will use undefined node
-  TaskNode* source =
-      new TaskNode(0, SOURCE_ID, 0, 3, 0);             // role, rank, task_id
-  TaskNode* node_a = new TaskNode(0, 0, 0, 3, 0);      // role, rank, task_id
-  TaskNode* sink = new TaskNode(0, SINK_ID, 0, 3, 0);  // role, rank, task_id
+  TaskNode* source = new TaskNode(0, SOURCE_ID, 0, 3);  // role, rank, task_id
+  TaskNode* node_a = new TaskNode(0, 0, 0, 3);          // role, rank, task_id
+  TaskNode* sink = new TaskNode(0, SINK_ID, 0, 3);      // role, rank, task_id
 
   source->AddDownstreamTask(0, 1);
   node_a->AddUpstreamTask(SOURCE_ID, 1);
diff --git a/paddle/fluid/distributed/fleet_executor/test/source_interceptor_test.cc b/paddle/fluid/distributed/fleet_executor/test/source_interceptor_test.cc
index 21a1b4accc9f1e64c6fbc4fab9a393fef6a430fe..a707650dfbc492529f37ac2a7e84f45bc1d72576 100644
--- a/paddle/fluid/distributed/fleet_executor/test/source_interceptor_test.cc
+++ b/paddle/fluid/distributed/fleet_executor/test/source_interceptor_test.cc
@@ -61,9 +61,8 @@ TEST(SourceInterceptor, Source) {
   msg_bus->Init(0, {{0, "127.0.0.0:0"}}, "");
 
   // NOTE: don't delete, otherwise interceptor will use undefined node
-  TaskNode* source =
-      new TaskNode(0, SOURCE_ID, 0, 3, 0);         // role, rank, task_id
-  TaskNode* node_a = new TaskNode(0, 0, 0, 3, 0);  // role, rank, task_id
+  TaskNode* source = new TaskNode(0, SOURCE_ID, 0, 3);  // role, rank, task_id
+  TaskNode* node_a = new TaskNode(0, 0, 0, 3);          // role, rank, task_id
 
   source->AddDownstreamTask(0, 1);
   node_a->AddUpstreamTask(SOURCE_ID, 1);
diff --git a/paddle/fluid/distributed/ps.proto b/paddle/fluid/distributed/ps.proto
new file mode 100644
index 0000000000000000000000000000000000000000..5d4ab954bbdaec0b18192802787e272ecf2e6b24
--- /dev/null
+++ b/paddle/fluid/distributed/ps.proto
@@ -0,0 +1,265 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// 
+//     http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+package paddle;
+option cc_generic_services = true;
+option cc_enable_arenas=true;
+
+message PSParameter {
+    optional string worker_class = 1;
+    optional string server_class = 2;
+    optional string instance_class = 3;
+    optional string init_gflags = 4 [ default = "" ];
+    optional WorkerParameter worker_param = 101;
+    optional ServerParameter server_param = 102;
+    repeated DownpourTrainerParameter trainer_param = 301;
+    optional FsClientParameter fs_client_param = 501;
+}
+
+message WorkerParameter {
+    optional DownpourWorkerParameter downpour_worker_param = 1;
+}
+
+message ServerParameter {
+    optional DownpourServerParameter downpour_server_param = 1;
+}
+
+message DownpourWorkerParameter {
+    repeated TableParameter downpour_table_param = 1;
+}
+
+message DownpourTrainerParameter {
+    repeated DenseTableParameter dense_table = 1;
+    repeated SparseTableParameter sparse_table = 2;
+    optional int32 push_sparse_per_batch = 3;
+    optional int32 push_dense_per_batch = 4;
+    repeated string skip_op = 5;
+    repeated ProgramConfig program_config = 6;
+}
+
+message ProgramConfig {
+    required string program_id = 1;
+    repeated int32 push_sparse_table_id = 2;
+    repeated int32 push_dense_table_id = 3;
+    repeated int32 pull_sparse_table_id = 4;
+    repeated int32 pull_dense_table_id = 5;
+}
+
+message DenseTableParameter {
+    optional int32 table_id = 1;
+    repeated string dense_variable_name = 2;
+    repeated string dense_gradient_variable_name = 3;
+    optional int32 fea_dim = 4;
+}
+
+message SparseTableParameter {
+    optional int32 table_id = 1;
+    optional int32 feature_dim = 2;
+    repeated string slot_key = 3;
+    repeated string slot_value = 4;
+    repeated string slot_gradient = 5;
+}
+
+message DownpourServerParameter {
+    repeated TableParameter downpour_table_param = 1;
+    optional ServerServiceParameter service_param = 2;
+}
+
+message ServerServiceParameter {
+    optional string server_class      = 1 [ default = "DownpourBrpcPsServer" ];
+    optional string client_class      = 2 [ default = "DownpourBrpcPsClient" ];
+    optional string service_class     = 3 [ default = "DownpourPsService"];
+    optional uint32 start_server_port = 4 [ default = 0 ];  //will find a avaliable port from it
+    optional uint32 server_thread_num = 5 [ default = 12 ];
+}
+
+enum TableType {
+    PS_SPARSE_TABLE = 0;
+    PS_DENSE_TABLE = 1;
+}
+
+message TableParameter {
+    optional uint64 table_id = 1;
+    optional string table_class = 2;
+    optional uint64 shard_num = 3 [ default = 1000 ];
+    optional TableAccessorParameter accessor = 4;
+    optional TableType type = 5;
+    optional bool compress_in_save = 6 [default = false];
+    //for cache model
+    optional bool enable_sparse_table_cache = 7 [default = true];
+    optional double sparse_table_cache_rate = 8 [default = 0.00055];
+    optional uint32 sparse_table_cache_file_num = 9 [default = 16];
+    optional double sparse_table_mem_cache_rate = 10 [default = 0.5];
+
+}
+
+message TableAccessorParameter {
+    optional string accessor_class = 1;
+    optional SparseSGDRuleParameter sparse_sgd_param = 2;
+    optional DenseSGDRuleParameter dense_sgd_param = 3;
+    optional uint32 fea_dim = 4 [default = 11];
+    optional uint32 embedx_dim = 5 [default = 8];
+    optional uint32 embedx_threshold = 6 [default = 10];
+    optional DownpourTableAccessorParameter downpour_accessor_param = 7;
+    repeated TableAccessorSaveParameter table_accessor_save_param = 8;
+    optional SparseCommonSGDRuleParameter sparse_commonsgd_param = 9;
+    optional SparseCommonSGDRuleParameter embed_sgd_param = 10;
+    optional SparseCommonSGDRuleParameter embedx_sgd_param = 11;
+}
+
+message DownpourTableAccessorParameter {
+    optional float nonclk_coeff = 1 [default = 0.1];             // to calculate show_click_score
+    optional float click_coeff = 2 [default = 1];                // to calculate show_click_score
+    optional float base_threshold = 3 [default = 1.5];           // show_click_score > base_threshold, this feature can be saved
+    optional float delta_threshold = 4 [default = 0.25];         // delta_score > delta_threshold, this feature can be saved
+    optional float delta_keep_days = 5 [default = 16];           // unseen_day < delta_keep_days, this feature can be saved
+    optional float show_click_decay_rate = 6 [default = 0.98];   // show/click will update to show/click * show_click_decay_rate after a day
+    optional float delete_threshold = 7 [default = 0.8];         // threshold to shrink a feasign
+    optional float delete_after_unseen_days = 8 [default = 30];  // unseen_day > delete_after_unseen_days, this feature will be delete in shrink_model
+    optional int32 ssd_unseenday_threshold = 9 [default = 1];    // threshold to save ssd
+}
+
+message TableAccessorSaveParameter {
+    optional uint32 param = 1;
+    optional string converter = 2;
+    optional string deconverter = 3;
+}
+
+enum PsCmdID {
+    PS_PULL_DENSE_TABLE                      = 0;
+    PS_PUSH_DENSE_TABLE                      = 1;
+    PS_PULL_SPARSE_TABLE                     = 2;
+    PS_PUSH_SPARSE_TABLE                     = 3;
+    PS_SHRINK_TABLE                          = 4;
+    PS_SAVE_ONE_TABLE                        = 5;
+    PS_SAVE_ALL_TABLE                        = 6;
+    PS_LOAD_ONE_TABLE                        = 7;
+    PS_LOAD_ALL_TABLE                        = 8;
+    PS_CLEAR_ONE_TABLE                       = 9;
+    PS_CLEAR_ALL_TABLE                       = 10;
+    PS_PUSH_DENSE_PARAM                      = 11;
+    PS_STOP_SERVER                           = 12;
+    PS_SAVE_ONE_CACHE_TABLE                  = 13;
+    PS_GET_CACHE_THRESHOLD                   = 14;
+    PS_CACHE_SHUFFLE                         = 15;
+    PS_COPY_TABLE                            = 16;
+    PS_COPY_TABLE_BY_FEASIGN                 = 17;
+    PS_PULL_SPARSE_TABLE_WITH_DEPENDENCY     = 18;
+    PS_PUSH_SPARSE_TABLE_WITH_DEPENDENCY     = 19;
+    PS_PRINT_TABLE_STAT                      = 20;
+    PS_SAVE_ONE_TABLE_PREFIX                 = 21;
+    PS_SAVE_MEM_CACHE_TABLE                  = 22;
+    //pserver2pserver cmd start from 100
+    PS_S2S_MSG                               = 101;
+    //local_client2local_client cmd start from 200
+    PS_C2C_PULL_SPARSE_TABLE                 = 201;
+}
+
+message PsRequestMessage {
+    required uint32 cmd_id = 1;
+    optional uint32 table_id = 2;
+    repeated bytes params = 3;
+    optional int32 client_id = 4;
+    optional bytes data = 5;
+};
+
+message SparseSGDRuleParameter {
+    optional double learning_rate = 1 [default = 0.05];
+    optional double initial_g2sum = 2 [default = 3.0];
+    optional double initial_range = 3 [default = 0.0001];
+    repeated float weight_bounds = 4;
+}
+
+message SparseCommonSGDRuleParameter {
+    optional string name = 1;
+    optional SparseNaiveSGDRuleParameter naive = 2;
+    optional SparseAdagradSGDRuleParameter adagrad = 3;
+    optional SparseAdamSGDParameter adam = 4;
+}
+
+message SparseNaiveSGDRuleParameter {
+    optional double learning_rate = 1 [default = 0.05];
+    optional double initial_range = 2 [default = 0.0001];
+    repeated float weight_bounds = 3;
+}
+
+message SparseAdagradSGDRuleParameter {
+    optional double learning_rate = 1 [default = 0.05];
+    optional double initial_g2sum = 2 [default = 3.0];
+    optional double initial_range = 3 [default = 0.0001];
+    repeated float weight_bounds = 4;
+}
+
+message SparseAdamSGDParameter {
+    optional double learning_rate = 1 [default = 0.001];
+    optional double initial_range = 2 [default = 0.0001];
+    optional double beta1_decay_rate = 3 [default = 0.9];
+    optional double beta2_decay_rate = 4 [default = 0.999];
+    optional double ada_epsilon = 5 [default = 1e-08];
+    repeated float weight_bounds = 6;
+}
+
+message DenseSGDRuleParameter {
+    optional string name = 1;
+    optional AdamSGDParameter adam = 2;
+    optional NaiveSGDParameter naive = 3;
+    optional SummarySGDParameter summary = 4;
+    optional MovingAverageRuleParameter moving_average = 5;
+}
+
+message AdamSGDParameter {
+    optional double learning_rate = 1 [default = 5e-06];         // \u5B66\u4E60\u7387
+    optional double avg_decay_rate = 2 [default = 0.999993];     // avg_weight\u7684\u8870\u51CF\u7CFB\u6570
+    optional double ada_decay_rate = 3 [default = 0.9999];
+    optional double ada_epsilon = 4 [default = 1e-08];
+    optional double mom_decay_rate = 5 [default = 0.99];
+}
+
+message NaiveSGDParameter {
+    optional double learning_rate = 1 [default = 0.0002];
+    optional double avg_decay_rate = 2;
+}
+
+message SummarySGDParameter {
+    optional double summary_decay_rate = 1 [default = 0.999999];     // \u6743\u91CD\u7684\u8870\u51CF\u7CFB\u6570
+}
+
+message MovingAverageRuleParameter {
+    optional double momentum = 1;
+}
+
+message PsResponseMessage {
+    required int32 err_code = 1 [default = 0];
+    required string err_msg = 2 [default = ""];
+    optional bytes data = 3;
+};
+
+service PsService {
+    rpc service(PsRequestMessage) returns (PsResponseMessage);
+};
+
+message FsClientParameter {
+    enum FsApiType {
+         HDFS = 0;
+         AFS = 1;
+    }
+    optional FsApiType fs_type      = 1 [default = HDFS];
+    optional string    uri          = 2;                  //such as afs://tianqi.afs.baidu.com:9902
+    optional string    user         = 3;                  //user_name to access fs
+    optional string    passwd       = 4;                  //password
+    optional int32     buffer_size  = 5;                  //buffer for read/write
+    optional string hadoop_bin = 51;
+    optional string afs_conf = 101;
+}
diff --git a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py
index 000c127f51456378b9c647eb5467812f8290f2e7..499eb42ea5ca3ea8dc546b0d9c278f46bed61c16 100644
--- a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py
+++ b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py
@@ -1839,9 +1839,9 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase):
             False if self.composite_func_info == {} else True
         )
 
-        if is_composite_grad_api:
+        if is_composite_grad_api and next_grad_node_creation_str != '':
             next_grad_node_creation_str = f"""
- if (!paddle::prim::PrimCommonUtils::IsPrimEnabled()) {{
+ if (!paddle::prim::PrimCommonUtils::IsBwdPrimEnabled()) {{
     {next_grad_node_creation_str}
  }}
   """
@@ -1982,6 +1982,7 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase):
         backward_attrs_list = self.backward_attrs_list
         backward_inplace_map = self.backward_inplace_map
         indent = GetIndent(1)
+        need_gen_trace_backard_for_inplace = False
 
         # Construct grad_api function args
         # Order: TensorWrappers, GradTensors, Attributes
@@ -2211,6 +2212,7 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase):
   }} else {{
     {inplace_str}
   }}"""
+                        need_gen_trace_backard_for_inplace = True
                     else:
                         inplace_for_grad_outs_str += inplace_str
 
@@ -2259,7 +2261,7 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase):
         # TODO(Ruting):using composite only when we don't have backward kernel in the future.
         elif is_composite_grad_api:
             grad_function_call_str = f"""
-  if (paddle::prim::PrimCommonUtils::IsPrimEnabled()) {{
+  if (paddle::prim::PrimCommonUtils::IsBwdPrimEnabled()) {{
   {indent}{composite_grad_api_namespace}{composite_grad_api_name}{composite_template_name}({composite_grad_api_args_str});
   VLOG(4) << "Composite api {composite_grad_api_name} is called ";
   }}else{{
@@ -2282,7 +2284,7 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase):
         if (
             len(next_grad_node_creation_str) > 0
             or is_invoke_forward_api
-            or inplace_for_grad_outs_str != ''
+            or need_gen_trace_backard_for_inplace
         ):
             compute_require_next_grad_str = f"{indent}bool trace_backward = egr::Controller::Instance().HasGrad() && create_graph;\n"
 
diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt
index a7dfa61f7ef4bd83a24b76d8b23ad6b961683e83..55ad228470cdfe37de4a148cf7112ec74d4a4ae6 100755
--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@@ -618,7 +618,8 @@ if(WITH_PYTHON)
     fleet_proto_init
     pass_desc_py_proto
     ps_py_proto
-    ps_py_proto_init)
+    ps_py_proto_init
+    pslib_py_proto)
   if(NOT WIN32)
     add_custom_command(
       TARGET framework_py_proto
diff --git a/paddle/fluid/framework/ir/fused_attention_pass.cc b/paddle/fluid/framework/ir/fused_attention_pass.cc
index 771bf958d21683c53852fa7cfc4d57edc358347b..72fa90db9b15a920d3ad4ee33d80d8e9f84455dd 100644
--- a/paddle/fluid/framework/ir/fused_attention_pass.cc
+++ b/paddle/fluid/framework/ir/fused_attention_pass.cc
@@ -327,8 +327,441 @@ PDNode* FusedAttentionGradPattern::operator()(PDNode* x,
                                               bool has_attn_mask,
                                               bool do_dropout,
                                               bool add_residual) {
-  // TODO(Yuang Liu): finish the backward pattern
-  return nullptr;
+  // post layer norm
+  PDNode* post_layer_norm_grad_out_node{nullptr};
+  if (post_layer_norm) {
+    auto* post_layer_norm_grad_node =
+        pattern->NewNode(post_layer_norm_grad_op_repr())
+            ->assert_is_op("layer_norm_grad");
+    auto* post_layer_norm_grad_bias_node =
+        pattern->NewNode(post_layer_norm_grad_bias_repr())
+            ->assert_is_op_input("layer_norm_grad", "Bias");
+    auto* post_layer_norm_grad_scale_node =
+        pattern->NewNode(post_layer_norm_grad_scale_repr())
+            ->assert_is_op_input("layer_norm_grad", "Scale");
+    auto* post_layer_norm_grad_mean_node =
+        pattern->NewNode(post_layer_norm_grad_mean_repr())
+            ->assert_is_op_input("layer_norm_grad", "Mean");
+    auto* post_layer_norm_grad_variance_node =
+        pattern->NewNode(post_layer_norm_grad_variance_repr())
+            ->assert_is_op_input("layer_norm_grad", "Variance");
+    auto* post_layer_norm_grad_x_node =
+        pattern->NewNode(post_layer_norm_grad_x_repr())
+            ->assert_is_op_input("layer_norm_grad", "X");
+    post_layer_norm_grad_out_node =
+        pattern->NewNode(post_layer_norm_grad_x_grad_repr())
+            ->assert_is_op_output("layer_norm_grad", "X@GRAD");
+    auto* post_layer_norm_grad_bias_grad_node =
+        pattern->NewNode(post_layer_norm_grad_bias_grad_repr())
+            ->assert_is_op_output("layer_norm_grad", "Bias@GRAD");
+    auto* post_layer_norm_grad_scale_grad_node =
+        pattern->NewNode(post_layer_norm_grad_scale_grad_repr())
+            ->assert_is_op_output("layer_norm_grad", "Scale@GRAD");
+    post_layer_norm_grad_node
+        ->LinksFrom({x,
+                     post_layer_norm_grad_bias_node,
+                     post_layer_norm_grad_scale_node,
+                     post_layer_norm_grad_mean_node,
+                     post_layer_norm_grad_variance_node,
+                     post_layer_norm_grad_x_node})
+        .LinksTo({post_layer_norm_grad_out_node,
+                  post_layer_norm_grad_bias_grad_node,
+                  post_layer_norm_grad_scale_grad_node});
+  }
+
+  // add residual
+  PDNode* residual_ele_add_grad_out_node{nullptr};
+  PDNode* residual_ele_add_grad_x_node{nullptr};
+  PDNode* residual_ele_add_grad_x_grad_node{nullptr};
+  if (add_residual) {
+    PDNode* ele_add_grad_input = x;
+    if (post_layer_norm) {
+      ele_add_grad_input = post_layer_norm_grad_out_node;
+    }
+    auto* residual_ele_add_grad_node =
+        pattern->NewNode(residual_ele_add_grad_op_repr())
+            ->assert_is_op("elementwise_add_grad");
+    residual_ele_add_grad_x_node =
+        pattern->NewNode(residual_ele_add_grad_x_repr())
+            ->assert_is_op_input("elementwise_add_grad", "X");
+    auto* residual_ele_add_grad_bias_node =
+        pattern->NewNode(residual_ele_add_grad_bias_repr())
+            ->assert_is_op_input("elementwise_add_grad", "Y");
+    residual_ele_add_grad_out_node =
+        pattern->NewNode(residual_ele_add_grad_bias_grad_repr())
+            ->assert_is_op_output("elementwise_add_grad", "Y@GRAD");
+    residual_ele_add_grad_x_grad_node =
+        pattern->NewNode(residual_ele_add_grad_x_grad_repr())
+            ->assert_is_op_output("elementwise_add_grad", "X@GRAD");
+    ele_add_grad_input->assert_is_op_input("elementwise_add_grad", "Out@GRAD");
+    residual_ele_add_grad_node
+        ->LinksFrom({ele_add_grad_input,
+                     residual_ele_add_grad_x_node,
+                     residual_ele_add_grad_bias_node})
+        .LinksTo({residual_ele_add_grad_x_grad_node,
+                  residual_ele_add_grad_out_node});
+  }
+
+  // get the real input x for dropout grad
+  PDNode* out_linear_grad_input_node = x;
+  if (post_layer_norm && !add_residual) {
+    out_linear_grad_input_node = post_layer_norm_grad_out_node;
+  } else if (add_residual) {
+    out_linear_grad_input_node = residual_ele_add_grad_out_node;
+  }
+
+  // out linear part
+  auto* out_linear_dropout_grad_node =
+      pattern->NewNode(out_linear_dropout_grad_op_repr())
+          ->assert_is_op("dropout_grad");
+  auto* out_linear_dropout_grad_mask_node =
+      pattern->NewNode(out_linear_dropout_grad_mask_repr())
+          ->assert_is_op_input("dropout_grad", "Mask");
+  auto* out_linear_dropout_grad_out_node =
+      pattern->NewNode(out_linear_dropout_grad_out_repr())
+          ->assert_is_op_output("dropout_grad", "X@GRAD");
+  out_linear_grad_input_node->assert_is_op_input("dropout_grad", "Out@GRAD");
+  out_linear_dropout_grad_node
+      ->LinksFrom(
+          {out_linear_grad_input_node, out_linear_dropout_grad_mask_node})
+      .LinksTo({out_linear_dropout_grad_out_node});
+
+  auto* out_linear_ele_add_grad_node =
+      pattern->NewNode(out_linear_ele_add_grad_op_repr())
+          ->assert_is_op("elementwise_add_grad");
+  auto* out_linear_ele_add_grad_x_node =
+      pattern->NewNode(out_linear_ele_add_grad_x_repr())
+          ->assert_is_op_input("elementwise_add_grad", "X");
+  auto* out_linear_ele_add_grad_bias_node =
+      pattern->NewNode(out_linear_ele_add_grad_bias_repr())
+          ->assert_is_op_input("elementwise_add_grad", "Y");
+  auto* out_linear_ele_add_grad_x_grad_node =
+      pattern->NewNode(out_linear_ele_add_grad_x_grad_repr())
+          ->assert_is_op_output("elementwise_add_grad", "X@GRAD");
+  auto* out_linear_ele_add_grad_bias_grad_node =
+      pattern->NewNode(out_linear_ele_add_grad_bias_grad_repr())
+          ->assert_is_op_output("elementwise_add_grad", "Y@GRAD");
+  out_linear_dropout_grad_out_node->assert_is_op_input("elementwise_add_grad",
+                                                       "Out@GRAD");
+  out_linear_ele_add_grad_node
+      ->LinksFrom({out_linear_dropout_grad_out_node,
+                   out_linear_ele_add_grad_x_node,
+                   out_linear_ele_add_grad_bias_node})
+      .LinksTo({out_linear_ele_add_grad_x_grad_node,
+                out_linear_ele_add_grad_bias_grad_node});
+
+  auto* out_linear_matmul_grad_node =
+      pattern->NewNode(out_linear_matmul_grad_op_repr())
+          ->assert_is_op("matmul_v2_grad");
+  auto* out_linear_matmul_grad_x_node =
+      pattern->NewNode(out_linear_matmul_grad_x_repr())
+          ->assert_is_op_input("matmul_v2_grad", "X");
+  auto* out_linear_matmul_grad_w_node =
+      pattern->NewNode(out_linear_matmul_grad_w_repr())
+          ->assert_is_op_input("matmul_v2_grad", "Y");
+  auto* out_linear_matmul_grad_x_grad_node =
+      pattern->NewNode(out_linear_matmul_grad_x_grad_repr())
+          ->assert_is_op_output("matmul_v2_grad", "X@GRAD");
+  auto* out_linear_matmul_grad_w_grad_node =
+      pattern->NewNode(out_linear_matmul_grad_w_grad_repr())
+          ->assert_is_op_output("matmul_v2_grad", "Y@GRAD");
+  out_linear_ele_add_grad_x_grad_node->assert_is_op_input("matmul_v2_grad",
+                                                          "Out@GRAD");
+  out_linear_matmul_grad_node
+      ->LinksFrom({out_linear_ele_add_grad_x_grad_node,
+                   out_linear_matmul_grad_x_node,
+                   out_linear_matmul_grad_w_node})
+      .LinksTo({out_linear_matmul_grad_x_grad_node,
+                out_linear_matmul_grad_w_grad_node});
+
+  // core attention part
+  auto* qkv_reshape_grad_node = pattern->NewNode(qkv_reshape_grad_op_repr())
+                                    ->assert_is_op("reshape2_grad");
+  auto* qkv_reshape_grad_x_shape_node =
+      pattern->NewNode(qkv_reshape_grad_x_shape_repr())
+          ->assert_is_op_input("reshape2_grad", "XShape");
+  auto* qkv_reshape_grad_out_node =
+      pattern->NewNode(qkv_reshape_grad_out_repr())
+          ->assert_is_op_output("reshape2_grad", "X@GRAD");
+  out_linear_matmul_grad_x_grad_node->assert_is_op_input("reshape2_grad",
+                                                         "Out@GRAD");
+  qkv_reshape_grad_node
+      ->LinksFrom(
+          {out_linear_matmul_grad_x_grad_node, qkv_reshape_grad_x_shape_node})
+      .LinksTo({qkv_reshape_grad_out_node});
+
+  auto* qkv_transpose_grad_node = pattern->NewNode(qkv_transpose_grad_op_repr())
+                                      ->assert_is_op("transpose2_grad");
+  auto* qkv_transpose_grad_x_shape_node =
+      pattern->NewNode(qkv_transpose_grad_x_shape_repr())
+          ->assert_is_op_input("transpose2_grad", "XShape");
+  auto* qkv_transpose_grad_out_node =
+      pattern->NewNode(qkv_transpose_grad_out_repr())
+          ->assert_is_op_output("transpose2_grad", "X@GRAD");
+  qkv_reshape_grad_out_node->assert_is_op_input("transpose2_grad", "Out@GRAD");
+  qkv_transpose_grad_node
+      ->LinksFrom({qkv_reshape_grad_out_node, qkv_transpose_grad_x_shape_node})
+      .LinksTo({qkv_transpose_grad_out_node});
+
+  auto* qkv_matmul_grad_node = pattern->NewNode(qkv_matmul_grad_op_repr())
+                                   ->assert_is_op("matmul_v2_grad");
+  auto* qkv_matmul_grad_x_node =
+      pattern->NewNode(qkv_matmul_grad_x_repr())
+          ->assert_is_op_input("matmul_v2_grad", "X");
+  auto* qkv_matmul_grad_w_node =
+      pattern->NewNode(qkv_matmul_grad_w_repr())
+          ->assert_is_op_input("matmul_v2_grad", "Y");
+  auto* qkv_matmul_grad_x_grad_node =
+      pattern->NewNode(qkv_matmul_grad_x_grad_repr())
+          ->assert_is_op_output("matmul_v2_grad", "X@GRAD");
+  auto* qkv_matmul_grad_w_grad_node =
+      pattern->NewNode(qkv_matmul_grad_w_grad_repr())
+          ->assert_is_op_output("matmul_v2_grad", "Y@GRAD");
+  qkv_transpose_grad_out_node->assert_is_op_input("matmul_v2_grad", "Out@GRAD");
+  qkv_matmul_grad_node
+      ->LinksFrom({qkv_transpose_grad_out_node,
+                   qkv_matmul_grad_x_node,
+                   qkv_matmul_grad_w_node})
+      .LinksTo({qkv_matmul_grad_x_grad_node, qkv_matmul_grad_w_grad_node});
+
+  PDNode* attn_dropout_grad_out_node{nullptr};
+  if (do_dropout) {
+    auto* attn_dropout_grad_node = pattern->NewNode(attn_dropout_grad_op_repr())
+                                       ->assert_is_op("dropout_grad");
+    auto* attn_dropout_grad_mask_node =
+        pattern->NewNode(attn_dropout_grad_mask_repr())
+            ->assert_is_op_input("dropout_grad", "Mask");
+    attn_dropout_grad_out_node =
+        pattern->NewNode(attn_dropout_grad_out_repr())
+            ->assert_is_op_output("dropout_grad", "X@GRAD");
+    qkv_matmul_grad_x_grad_node->assert_is_op_input("dropout_grad", "Out@GRAD");
+    attn_dropout_grad_node
+        ->LinksFrom({qkv_matmul_grad_x_grad_node, attn_dropout_grad_mask_node})
+        .LinksTo({attn_dropout_grad_out_node});
+  }
+
+  PDNode* qk_softmax_grad_input_node =
+      do_dropout ? attn_dropout_grad_out_node : qkv_matmul_grad_x_grad_node;
+  auto* qk_softmax_grad_node =
+      pattern->NewNode(qk_softmax_grad_op_repr())->assert_is_op("softmax_grad");
+  auto* qk_softmax_grad_fwd_out_node =
+      pattern->NewNode(qk_softmax_grad_fwd_out_repr())
+          ->assert_is_op_input("softmax_grad", "Out");
+  auto* qk_softmax_grad_out =
+      pattern->NewNode(qk_softmax_grad_out_repr())
+          ->assert_is_op_output("softmax_grad", "X@GRAD");
+  qk_softmax_grad_input_node->assert_is_op_input("softmax_grad", "Out@GRAD");
+  qk_softmax_grad_node
+      ->LinksFrom({qk_softmax_grad_input_node, qk_softmax_grad_fwd_out_node})
+      .LinksTo({qk_softmax_grad_out});
+
+  PDNode* add_mask_ele_add_grad_x_grad_node{nullptr};
+  if (has_attn_mask) {
+    auto* add_mask_ele_add_grad_node =
+        pattern->NewNode(add_mask_ele_add_grad_op_repr())
+            ->assert_is_op("elementwise_add_grad");
+    auto* add_mask_ele_add_grad_x_node =
+        pattern->NewNode(add_mask_ele_add_grad_x_repr())
+            ->assert_is_op_input("elementwise_add_grad", "X");
+    auto* add_mask_ele_add_grad_bias_node =
+        pattern->NewNode(add_mask_ele_add_grad_bias_repr())
+            ->assert_is_op_input("elementwise_add_grad", "Y");
+    add_mask_ele_add_grad_x_grad_node =
+        pattern->NewNode(add_mask_ele_add_grad_x_grad_repr())
+            ->assert_is_op_output("elementwise_add_grad", "X@GRAD");
+    qk_softmax_grad_out->assert_is_op_input("elementwise_add_grad", "Out@GRAD");
+    add_mask_ele_add_grad_node
+        ->LinksFrom({add_mask_ele_add_grad_x_node,
+                     add_mask_ele_add_grad_bias_node,
+                     qk_softmax_grad_out})
+        .LinksTo({add_mask_ele_add_grad_x_grad_node});
+  }
+
+  PDNode* qk_scale_grad_input_node =
+      has_attn_mask ? add_mask_ele_add_grad_x_grad_node : qk_softmax_grad_out;
+  auto* qk_scale_grad_node =
+      pattern->NewNode(qk_scale_grad_op_repr())->assert_is_op("scale");
+  auto* qk_scale_grad_out_node =
+      pattern->NewNode(qk_scale_grad_out_repr())->assert_is_op_output("scale");
+  qk_scale_grad_input_node->assert_is_op_input("scale", "X");
+  qk_scale_grad_node->LinksFrom({qk_scale_grad_input_node})
+      .LinksTo({qk_scale_grad_out_node});
+
+  auto* qk_matmul_grad_node = pattern->NewNode(qk_matmul_grad_op_repr())
+                                  ->assert_is_op("matmul_v2_grad");
+  auto* qk_matmul_grad_x_node = pattern->NewNode(qk_matmul_grad_x_repr())
+                                    ->assert_is_op_input("matmul_v2_grad", "X");
+  auto* qk_matmul_grad_w_node = pattern->NewNode(qk_matmul_grad_w_repr())
+                                    ->assert_is_op_input("matmul_v2_grad", "Y");
+  auto* qk_matmul_grad_x_grad_node =
+      pattern->NewNode(qk_matmul_grad_x_grad_repr())
+          ->assert_is_op_output("matmul_v2_grad", "X@GRAD");
+  auto* qk_matmul_grad_w_grad_node =
+      pattern->NewNode(qk_matmul_grad_w_grad_repr())
+          ->assert_is_op_output("matmul_v2_grad", "Y@GRAD");
+  qk_scale_grad_out_node->assert_is_op_input("matmul_v2_grad", "Out@GRAD");
+  qk_matmul_grad_node
+      ->LinksFrom({qk_scale_grad_out_node,
+                   qk_matmul_grad_x_node,
+                   qk_matmul_grad_w_node})
+      .LinksTo({qk_matmul_grad_x_grad_node, qk_matmul_grad_w_grad_node});
+
+  // fuse qkv projection
+  auto* fuse_qkv_split_grad_node =
+      pattern->NewNode(fuse_qkv_split_grad_op_repr())->assert_is_op("concat");
+  auto* fuse_qkv_split_grad_out_node =
+      pattern->NewNode(fuse_qkv_split_grad_out_repr())
+          ->assert_is_op_output("concat");
+  qk_matmul_grad_x_grad_node->assert_is_op_input("concat");   // q grad
+  qk_matmul_grad_w_grad_node->assert_is_op_input("concat");   // k grad
+  qkv_matmul_grad_w_grad_node->assert_is_op_input("concat");  // v grad
+  fuse_qkv_split_grad_node
+      ->LinksFrom({qk_matmul_grad_x_grad_node,
+                   qk_matmul_grad_w_grad_node,
+                   qkv_matmul_grad_w_grad_node})
+      .LinksTo({fuse_qkv_split_grad_out_node});
+
+  auto* fuse_qkv_transpose_grad_node =
+      pattern->NewNode(fuse_qkv_transpose_grad_op_repr())
+          ->assert_is_op("transpose2_grad");
+  auto* fuse_qkv_transpose_grad_x_shape_node =
+      pattern->NewNode(fuse_qkv_transpose_grad_x_shape_repr())
+          ->assert_is_op_input("transpose2_grad", "XShape");
+  auto* fuse_qkv_transpose_grad_out_node =
+      pattern->NewNode(fuse_qkv_transpose_grad_out_repr())
+          ->assert_is_op_output("transpose2_grad", "X@GRAD");
+  fuse_qkv_split_grad_out_node->assert_is_op_input("transpose2_grad",
+                                                   "Out@GRAD");
+  fuse_qkv_transpose_grad_node
+      ->LinksFrom(
+          {fuse_qkv_split_grad_out_node, fuse_qkv_transpose_grad_x_shape_node})
+      .LinksTo({fuse_qkv_transpose_grad_out_node});
+
+  auto* fuse_qkv_reshape_grad_node =
+      pattern->NewNode(fuse_qkv_reshape_grad_op_repr())
+          ->assert_is_op("reshape2_grad");
+  auto* fuse_qkv_reshape_grad_x_shape_node =
+      pattern->NewNode(fuse_qkv_reshape_grad_x_shape_repr())
+          ->assert_is_op_input("reshape2_grad", "XShape");
+  auto* fuse_qkv_reshape_grad_out_node =
+      pattern->NewNode(fuse_qkv_reshape_grad_out_repr())
+          ->assert_is_op_output("reshape2_grad", "X@GRAD");
+  fuse_qkv_transpose_grad_out_node->assert_is_op_input("reshape2_grad",
+                                                       "Out@GRAD");
+  fuse_qkv_reshape_grad_node
+      ->LinksFrom({fuse_qkv_transpose_grad_out_node,
+                   fuse_qkv_reshape_grad_x_shape_node})
+      .LinksTo({fuse_qkv_reshape_grad_out_node});
+
+  auto* fuse_qkv_ele_add_grad_node =
+      pattern->NewNode(fuse_qkv_ele_add_grad_op_repr())
+          ->assert_is_op("elementwise_add_grad");
+  auto* fuse_qkv_ele_add_grad_x_node =
+      pattern->NewNode(fuse_qkv_ele_add_grad_x_repr())
+          ->assert_is_op_input("elementwise_add_grad", "X");
+  auto* fuse_qkv_ele_add_grad_bias_node =
+      pattern->NewNode(fuse_qkv_ele_add_grad_bias_repr())
+          ->assert_is_op_input("elementwise_add_grad", "Y");
+  auto* fuse_qkv_ele_add_grad_x_grad_node =
+      pattern->NewNode(fuse_qkv_ele_add_grad_x_grad_repr())
+          ->assert_is_op_output("elementwise_add_grad", "X@GRAD");
+  auto* fuse_qkv_ele_add_grad_bias_grad_node =
+      pattern->NewNode(fuse_qkv_ele_add_grad_bias_grad_repr())
+          ->assert_is_op_output("elementwise_add_grad", "Y@GRAD");
+  fuse_qkv_reshape_grad_out_node->assert_is_op_input("elementwise_add_grad",
+                                                     "Out@GRAD");
+  fuse_qkv_ele_add_grad_node
+      ->LinksFrom({fuse_qkv_reshape_grad_out_node,
+                   fuse_qkv_ele_add_grad_x_node,
+                   fuse_qkv_ele_add_grad_bias_node})
+      .LinksTo({fuse_qkv_ele_add_grad_x_grad_node,
+                fuse_qkv_ele_add_grad_bias_grad_node});
+
+  auto* fuse_qkv_matmul_grad_node =
+      pattern->NewNode(fuse_qkv_matmul_grad_op_repr())
+          ->assert_is_op("matmul_v2_grad");
+  auto* fuse_qkv_matmul_grad_x_node =
+      pattern->NewNode(fuse_qkv_matmul_grad_x_repr())
+          ->assert_is_op_input("matmul_v2_grad", "X");
+  auto* fuse_qkv_matmul_grad_w_node =
+      pattern->NewNode(fuse_qkv_matmul_grad_w_repr())
+          ->assert_is_op_input("matmul_v2_grad", "Y");
+  auto* fuse_qkv_matmul_grad_x_grad_node =
+      pattern->NewNode(fuse_qkv_matmul_grad_x_grad_repr())
+          ->assert_is_op_output("matmul_v2_grad", "X@GRAD");
+  auto* fuse_qkv_matmul_grad_w_grad_node =
+      pattern->NewNode(fuse_qkv_matmul_grad_w_grad_repr())
+          ->assert_is_op_output("matmul_v2_grad", "Y@GRAD");
+  fuse_qkv_ele_add_grad_x_grad_node->assert_is_op_input("matmul_v2_grad",
+                                                        "Out@GRAD");
+  fuse_qkv_matmul_grad_node
+      ->LinksFrom({fuse_qkv_ele_add_grad_x_grad_node,
+                   fuse_qkv_matmul_grad_x_node,
+                   fuse_qkv_matmul_grad_w_node})
+      .LinksTo(
+          {fuse_qkv_matmul_grad_x_grad_node, fuse_qkv_matmul_grad_w_grad_node});
+
+  if (!pre_layer_norm) {
+    return fuse_qkv_matmul_grad_x_grad_node;
+  }
+
+  // pre layer norm
+  auto* pre_layer_norm_grad_node =
+      pattern->NewNode(pre_layer_norm_grad_op_repr())
+          ->assert_is_op("layer_norm_grad");
+  auto* pre_layer_norm_grad_scale_node =
+      pattern->NewNode(pre_layer_norm_grad_scale_repr())
+          ->assert_is_op_input("layer_norm_grad", "Scale");
+  auto* pre_layer_norm_grad_bias_node =
+      pattern->NewNode(pre_layer_norm_grad_bias_repr())
+          ->assert_is_op_input("layer_norm_grad", "Bias");
+  auto* pre_layer_norm_grad_mean_node =
+      pattern->NewNode(pre_layer_norm_grad_mean_repr())
+          ->assert_is_op_input("layer_norm_grad", "Mean");
+  auto* pre_layer_norm_grad_variance_node =
+      pattern->NewNode(pre_layer_norm_grad_variance_repr())
+          ->assert_is_op_input("layer_norm_grad", "Variance");
+  auto* pre_layer_norm_grad_x_node =
+      add_residual ? residual_ele_add_grad_x_node
+                   : pattern->NewNode(pre_layer_norm_grad_x_repr())
+                         ->assert_is_op_input("layer_norm_grad", "X");
+  auto* pre_layer_norm_grad_scale_grad_node =
+      pattern->NewNode(pre_layer_norm_grad_scale_grad_repr())
+          ->assert_is_op_output("layer_norm_grad", "Scale@GRAD");
+  auto* pre_layer_norm_grad_bias_grad_node =
+      pattern->NewNode(pre_layer_norm_grad_bias_grad_repr())
+          ->assert_is_op_output("layer_norm_grad", "Bias@GRAD");
+  auto* pre_layer_norm_grad_x_grad_node =
+      pattern->NewNode(pre_layer_norm_grad_x_grad_repr())
+          ->assert_is_op_output("layer_norm_grad", "X@GRAD");
+  fuse_qkv_matmul_grad_x_grad_node->assert_is_op_input("layer_norm_grad",
+                                                       "Y@GRAD");
+  pre_layer_norm_grad_node
+      ->LinksFrom({fuse_qkv_matmul_grad_x_grad_node,
+                   pre_layer_norm_grad_scale_node,
+                   pre_layer_norm_grad_bias_node,
+                   pre_layer_norm_grad_mean_node,
+                   pre_layer_norm_grad_variance_node,
+                   pre_layer_norm_grad_x_node})
+      .LinksTo({pre_layer_norm_grad_scale_grad_node,
+                pre_layer_norm_grad_bias_grad_node,
+                pre_layer_norm_grad_x_grad_node});
+
+  if (!add_residual) {
+    return pre_layer_norm_grad_x_grad_node;
+  }
+
+  auto* grad_accumulation_sum_node =
+      pattern->NewNode(grad_accumulation_sum_op_repr())->assert_is_op("sum");
+  auto* grad_accumulation_sum_out_node =
+      pattern->NewNode(grad_accumulation_out_repr())
+          ->assert_is_op_output("sum");
+  grad_accumulation_sum_node
+      ->LinksFrom(
+          {pre_layer_norm_grad_x_grad_node, residual_ele_add_grad_x_grad_node})
+      .LinksTo({grad_accumulation_sum_out_node});
+
+  return grad_accumulation_sum_out_node;
 }
 
 }  // namespace patterns
@@ -437,7 +870,107 @@ ir::Graph* FusedAttentionsPass::PreMaskDropResPostFwd(Graph* graph) const {
 }
 
 ir::Graph* FusedAttentionsPass::PreMaskDropResPostBwd(Graph* graph) const {
-  // TODO(Yuang Liu): finish the pass
+  GraphPatternDetector gpd;
+  auto* x = gpd.mutable_pattern()
+                ->NewNode(patterns::PDNodeName(name_scope_, "x"))
+                ->AsInput()
+                ->assert_is_op_input("layer_norm_grad", "Y@GRAD");
+  patterns::FusedAttentionGradPattern fused_attention_grad_pattern(
+      gpd.mutable_pattern(), "fused_attention_grad_pattern");
+
+  fused_attention_grad_pattern(x,
+                               /* pre_layer_norm */ true,
+                               /* post_layer_norm */ true,
+                               /* has_attn_mask */ true,
+                               /* do_dropout */ true,
+                               /* add_residual */ true);
+
+  int found_fused_attention = 0;
+
+  auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
+                     Graph* g) {
+    VLOG(3) << "handle FusedMultiHeadAttention backward pass's fusion";
+
+    GET_IR_NODE_FROM_SUBGRAPH(post_layer_norm_grad_op_node,
+                              post_layer_norm_grad_op,
+                              fused_attention_grad_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(residual_ele_add_grad_op_node,
+                              residual_ele_add_grad_op,
+                              fused_attention_grad_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(out_linear_dropout_grad_op_node,
+                              out_linear_dropout_grad_op,
+                              fused_attention_grad_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(out_linear_ele_add_grad_op_node,
+                              out_linear_ele_add_grad_op,
+                              fused_attention_grad_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(out_linear_matmul_grad_op_node,
+                              out_linear_matmul_grad_op,
+                              fused_attention_grad_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(qkv_reshape_grad_op_node,
+                              qkv_reshape_grad_op,
+                              fused_attention_grad_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(qkv_transpose_grad_op_node,
+                              qkv_transpose_grad_op,
+                              fused_attention_grad_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(qkv_matmul_grad_op_node,
+                              qkv_matmul_grad_op,
+                              fused_attention_grad_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(attn_dropout_grad_op_node,
+                              attn_dropout_grad_op,
+                              fused_attention_grad_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(qk_softmax_grad_op_node,
+                              qk_softmax_grad_op,
+                              fused_attention_grad_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(add_mask_ele_add_grad_op_node,
+                              add_mask_ele_add_grad_op,
+                              fused_attention_grad_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(
+        qk_scale_grad_op_node, qk_scale_grad_op, fused_attention_grad_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(qk_matmul_grad_op_node,
+                              qk_matmul_grad_op,
+                              fused_attention_grad_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(fuse_qkv_split_grad_op_node,
+                              fuse_qkv_split_grad_op,
+                              fused_attention_grad_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(fuse_qkv_transpose_grad_op_node,
+                              fuse_qkv_transpose_grad_op,
+                              fused_attention_grad_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(fuse_qkv_reshape_grad_op_node,
+                              fuse_qkv_reshape_grad_op,
+                              fused_attention_grad_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(fuse_qkv_ele_add_grad_op_node,
+                              fuse_qkv_ele_add_grad_op,
+                              fused_attention_grad_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(fuse_qkv_matmul_grad_op_node,
+                              fuse_qkv_matmul_grad_op,
+                              fused_attention_grad_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(pre_layer_norm_grad_op_node,
+                              pre_layer_norm_grad_op,
+                              fused_attention_grad_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(grad_accumulation_sum_op_node,
+                              grad_accumulation_sum_op,
+                              fused_attention_grad_pattern);
+
+    // TODO(Yuang Liu): finish the handler
+
+    GraphSafeRemoveNodes(
+        g, {post_layer_norm_grad_op_node,    residual_ele_add_grad_op_node,
+            out_linear_dropout_grad_op_node, out_linear_ele_add_grad_op_node,
+            out_linear_matmul_grad_op_node,  qkv_reshape_grad_op_node,
+            qkv_transpose_grad_op_node,      qkv_matmul_grad_op_node,
+            attn_dropout_grad_op_node,       qk_softmax_grad_op_node,
+            add_mask_ele_add_grad_op_node,   qk_scale_grad_op_node,
+            qk_matmul_grad_op_node,          fuse_qkv_split_grad_op_node,
+            fuse_qkv_transpose_grad_op_node, fuse_qkv_reshape_grad_op_node,
+            fuse_qkv_ele_add_grad_op_node,   fuse_qkv_matmul_grad_op_node,
+            pre_layer_norm_grad_op_node,     grad_accumulation_sum_op_node});
+
+    found_fused_attention++;
+  };
+
+  gpd(graph, handler);
+  AddStatis(found_fused_attention);
+
   return graph;
 }
 
diff --git a/paddle/fluid/framework/ir/fused_attention_pass.h b/paddle/fluid/framework/ir/fused_attention_pass.h
index 5ec1aac41ec9b8b289d314e2e79a87429c164bf8..d360f7f6520d102a4331b165880d1ba996dd93dc 100644
--- a/paddle/fluid/framework/ir/fused_attention_pass.h
+++ b/paddle/fluid/framework/ir/fused_attention_pass.h
@@ -140,7 +140,116 @@ struct FusedAttentionGradPattern : public PatternBase {
                      bool do_dropout,       // dropout the softmax(qk) or not
                      bool add_residual);    // add residual to out linear or not
 
-  // TODO(Yuang Liu): add backward pattern
+  // post layer norm grad
+  PATTERN_DECL_NODE(post_layer_norm_grad_op);
+  PATTERN_DECL_NODE(post_layer_norm_grad_scale);
+  PATTERN_DECL_NODE(post_layer_norm_grad_bias);
+  PATTERN_DECL_NODE(post_layer_norm_grad_mean);
+  PATTERN_DECL_NODE(post_layer_norm_grad_variance);
+  PATTERN_DECL_NODE(post_layer_norm_grad_x);
+  PATTERN_DECL_NODE(post_layer_norm_grad_scale_grad);
+  PATTERN_DECL_NODE(post_layer_norm_grad_bias_grad);
+  PATTERN_DECL_NODE(post_layer_norm_grad_x_grad);
+
+  // residual grad
+  PATTERN_DECL_NODE(residual_ele_add_grad_op);
+  PATTERN_DECL_NODE(residual_ele_add_grad_x);
+  PATTERN_DECL_NODE(residual_ele_add_grad_bias);
+  PATTERN_DECL_NODE(residual_ele_add_grad_bias_grad);
+  PATTERN_DECL_NODE(residual_ele_add_grad_x_grad);
+
+  // out linear grad
+  PATTERN_DECL_NODE(out_linear_dropout_grad_op);
+  PATTERN_DECL_NODE(out_linear_dropout_grad_mask);
+  PATTERN_DECL_NODE(out_linear_dropout_grad_out);
+
+  PATTERN_DECL_NODE(out_linear_ele_add_grad_op);
+  PATTERN_DECL_NODE(out_linear_ele_add_grad_x);
+  PATTERN_DECL_NODE(out_linear_ele_add_grad_bias);
+  PATTERN_DECL_NODE(out_linear_ele_add_grad_x_grad);
+  PATTERN_DECL_NODE(out_linear_ele_add_grad_bias_grad);
+
+  PATTERN_DECL_NODE(out_linear_matmul_grad_op);
+  PATTERN_DECL_NODE(out_linear_matmul_grad_x);
+  PATTERN_DECL_NODE(out_linear_matmul_grad_w);
+  PATTERN_DECL_NODE(out_linear_matmul_grad_x_grad);
+  PATTERN_DECL_NODE(out_linear_matmul_grad_w_grad);
+
+  // core attention grad
+  PATTERN_DECL_NODE(qkv_reshape_grad_op);
+  PATTERN_DECL_NODE(qkv_reshape_grad_x_shape);
+  PATTERN_DECL_NODE(qkv_reshape_grad_out);
+
+  PATTERN_DECL_NODE(qkv_transpose_grad_op);
+  PATTERN_DECL_NODE(qkv_transpose_grad_x_shape);
+  PATTERN_DECL_NODE(qkv_transpose_grad_out);
+
+  PATTERN_DECL_NODE(qkv_matmul_grad_op);
+  PATTERN_DECL_NODE(qkv_matmul_grad_x);
+  PATTERN_DECL_NODE(qkv_matmul_grad_w);
+  PATTERN_DECL_NODE(qkv_matmul_grad_x_grad);
+  PATTERN_DECL_NODE(qkv_matmul_grad_w_grad);
+
+  PATTERN_DECL_NODE(attn_dropout_grad_op);
+  PATTERN_DECL_NODE(attn_dropout_grad_mask);
+  PATTERN_DECL_NODE(attn_dropout_grad_out);
+
+  PATTERN_DECL_NODE(qk_softmax_grad_op);
+  PATTERN_DECL_NODE(qk_softmax_grad_fwd_out);
+  PATTERN_DECL_NODE(qk_softmax_grad_out);
+
+  PATTERN_DECL_NODE(add_mask_ele_add_grad_op);
+  PATTERN_DECL_NODE(add_mask_ele_add_grad_x);
+  PATTERN_DECL_NODE(add_mask_ele_add_grad_bias);
+  PATTERN_DECL_NODE(add_mask_ele_add_grad_x_grad);
+
+  PATTERN_DECL_NODE(qk_scale_grad_op);
+  PATTERN_DECL_NODE(qk_scale_grad_out);
+
+  PATTERN_DECL_NODE(qk_matmul_grad_op);
+  PATTERN_DECL_NODE(qk_matmul_grad_x);
+  PATTERN_DECL_NODE(qk_matmul_grad_w);
+  PATTERN_DECL_NODE(qk_matmul_grad_x_grad);
+  PATTERN_DECL_NODE(qk_matmul_grad_w_grad);
+
+  // fuse qkv projection grad
+  PATTERN_DECL_NODE(fuse_qkv_split_grad_op);  // concat op
+  PATTERN_DECL_NODE(fuse_qkv_split_grad_out);
+
+  PATTERN_DECL_NODE(fuse_qkv_transpose_grad_op);
+  PATTERN_DECL_NODE(fuse_qkv_transpose_grad_x_shape);
+  PATTERN_DECL_NODE(fuse_qkv_transpose_grad_out);
+
+  PATTERN_DECL_NODE(fuse_qkv_reshape_grad_op);
+  PATTERN_DECL_NODE(fuse_qkv_reshape_grad_x_shape);
+  PATTERN_DECL_NODE(fuse_qkv_reshape_grad_out);
+
+  PATTERN_DECL_NODE(fuse_qkv_ele_add_grad_op);
+  PATTERN_DECL_NODE(fuse_qkv_ele_add_grad_x);
+  PATTERN_DECL_NODE(fuse_qkv_ele_add_grad_bias);
+  PATTERN_DECL_NODE(fuse_qkv_ele_add_grad_x_grad);
+  PATTERN_DECL_NODE(fuse_qkv_ele_add_grad_bias_grad);
+
+  PATTERN_DECL_NODE(fuse_qkv_matmul_grad_op);
+  PATTERN_DECL_NODE(fuse_qkv_matmul_grad_x);
+  PATTERN_DECL_NODE(fuse_qkv_matmul_grad_w);
+  PATTERN_DECL_NODE(fuse_qkv_matmul_grad_x_grad);
+  PATTERN_DECL_NODE(fuse_qkv_matmul_grad_w_grad);
+
+  // pre layer norm grad
+  PATTERN_DECL_NODE(pre_layer_norm_grad_op);
+  PATTERN_DECL_NODE(pre_layer_norm_grad_scale);
+  PATTERN_DECL_NODE(pre_layer_norm_grad_bias);
+  PATTERN_DECL_NODE(pre_layer_norm_grad_mean);
+  PATTERN_DECL_NODE(pre_layer_norm_grad_variance);
+  PATTERN_DECL_NODE(pre_layer_norm_grad_x);
+  PATTERN_DECL_NODE(pre_layer_norm_grad_scale_grad);
+  PATTERN_DECL_NODE(pre_layer_norm_grad_bias_grad);
+  PATTERN_DECL_NODE(pre_layer_norm_grad_x_grad);
+
+  // grad accumulation
+  PATTERN_DECL_NODE(grad_accumulation_sum_op);
+  PATTERN_DECL_NODE(grad_accumulation_out);
 };
 
 }  // namespace patterns
diff --git a/paddle/fluid/framework/ir/map_op_to_another_pass.cc b/paddle/fluid/framework/ir/map_op_to_another_pass.cc
old mode 100644
new mode 100755
index 2ae3216a44bd59094cb89cc60916367a97fee269..b8899ae751d81472f721ca1069f7a24cfedce1dc
--- a/paddle/fluid/framework/ir/map_op_to_another_pass.cc
+++ b/paddle/fluid/framework/ir/map_op_to_another_pass.cc
@@ -53,8 +53,13 @@ void MapOp2AnotherPass::ApplyImpl(ir::Graph* graph) const {
         op_desc->SetAttr("shape", std::vector<int>{0, -1});
       }
     } else if (op_type == "depthwise_conv2d") {
-      op_desc->SetType(replaced_map[op_type]);
-      op_desc->SetAttr("use_cudnn", true);
+      auto groups = PADDLE_GET_CONST(int, op_desc->GetAttr("groups"));
+      if (groups > 1) {
+#if CUDNN_VERSION >= 8100
+        op_desc->SetType(replaced_map[op_type]);
+        op_desc->SetAttr("use_cudnn", true);
+#endif
+      }
     }
     op_desc->Flush();
     ++found_count;
diff --git a/paddle/fluid/framework/ir/mkldnn/activation_onednn_fuse_pass.h b/paddle/fluid/framework/ir/mkldnn/activation_onednn_fuse_pass.h
new file mode 100644
index 0000000000000000000000000000000000000000..64232a2b7259b372c9d502cd4801f967f06056bf
--- /dev/null
+++ b/paddle/fluid/framework/ir/mkldnn/activation_onednn_fuse_pass.h
@@ -0,0 +1,87 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "paddle/fluid/framework/op_desc.h"
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+inline std::vector<std::string> GetSupportedActivations() {
+  return std::vector<std::string>{"abs",
+                                  "clip",
+                                  "gelu",
+                                  "hard_sigmoid",
+                                  "hard_swish",
+                                  "leaky_relu",
+                                  "mish",
+                                  "relu",
+                                  "relu6",
+                                  "sigmoid",
+                                  "sqrt",
+                                  "swish",
+                                  "tanh"};
+}
+
+inline std::unordered_map<std::string, std::string> GetAttributeMap(
+    std::string act_type) {
+  std::unordered_map<std::string, std::string> attr_map;
+  if (act_type == "swish") {
+    attr_map.emplace("beta", "fuse_alpha");
+  } else if (act_type == "relu6") {
+    attr_map.emplace("threshold", "fuse_alpha");
+  } else if (act_type == "hard_sigmoid") {
+    attr_map.emplace("slope", "fuse_alpha");
+    attr_map.emplace("offset", "fuse_beta");
+  } else if (act_type == "clip") {
+    attr_map.emplace("min", "fuse_alpha");
+    attr_map.emplace("max", "fuse_beta");
+  } else {
+    attr_map.emplace("alpha", "fuse_alpha");
+    attr_map.emplace("beta", "fuse_beta");
+  }
+  return attr_map;
+}
+
+inline void SetActivationAttrs(paddle::framework::OpDesc* fused_op,
+                               paddle::framework::OpDesc* act_op,
+                               const std::string& act_type) {
+  if (fused_op->HasAttr("use_mkldnn")) {
+    PADDLE_ENFORCE(PADDLE_GET_CONST(bool, fused_op->GetAttr("use_mkldnn")),
+                   phi::errors::PreconditionNotMet(
+                       "oneDNN activation fuses require use_mkldnn=True"));
+  }
+  fused_op->SetAttr("use_mkldnn", true);
+
+  auto attr_map = GetAttributeMap(act_type);
+  for (const auto& attr : attr_map) {
+    if (act_op->HasAttr(attr.first)) {
+      fused_op->SetAttr(attr.second, act_op->GetAttr(attr.first));
+    }
+  }
+
+  if (act_type == "gelu" && act_op->HasAttr("approximate")) {
+    std::string gelu_act_type =
+        PADDLE_GET_CONST(bool, act_op->GetAttr("approximate")) ? "gelu_tanh"
+                                                               : "gelu_erf";
+    fused_op->SetAttr("fuse_activation", gelu_act_type);
+  } else {
+    fused_op->SetAttr("fuse_activation", act_type);
+  }
+}
+
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc
index 2db957d84da9272627924359025f9b7a04acd91b..f905df3e53cce440ec62a9183112c982c10aee19 100644
--- a/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc
@@ -14,8 +14,8 @@
 
 #include "paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.h"
 
+#include "paddle/fluid/framework/ir/mkldnn/activation_onednn_fuse_pass.h"
 #include "paddle/fluid/framework/op_version_registry.h"
-#include "paddle/phi/backends/onednn/onednn_reuse.h"
 #include "paddle/utils/string/pretty_log.h"
 
 namespace paddle {
@@ -25,7 +25,7 @@ namespace ir {
 using string::PrettyLogDetail;
 
 void ConvActivationMkldnnFusePass::ApplyImpl(Graph* graph) const {
-  auto act_types = phi::funcs::GetSupportedActivations();
+  auto act_types = GetSupportedActivations();
   std::vector<std::string> conv_types = {"fused_conv2d", "conv2d"};
 
   for (auto& act_type : act_types) {
@@ -40,7 +40,7 @@ void ConvActivationMkldnnFusePass::FuseConvAct(Graph* graph,
                                                const std::string& conv_type,
                                                std::string& act_type) const {
   PADDLE_ENFORCE_NOT_NULL(
-      graph, platform::errors::InvalidArgument("Graph cannot be nullptr."));
+      graph, phi::errors::InvalidArgument("Graph cannot be nullptr."));
   FusePassBase::Init(conv_type + "_" + act_type + "_mkldnn_fuse_pass", graph);
 
   GraphPatternDetector gpd;
@@ -62,28 +62,13 @@ void ConvActivationMkldnnFusePass::FuseConvAct(Graph* graph,
     GET_IR_NODE_FROM_SUBGRAPH(activation_out, activation_out, conv_act_pattern);
 
     OpDesc* conv_op = conv->Op();
-    OpDesc* act_op = activation->Op();
 
     if (conv_op->Type() == "conv2d") {
       conv_op->SetType("fused_conv2d");
     }
 
-    auto attr_map = phi::funcs::GetAttributeMap(act_type);
-    for (const auto& attrs : attr_map) {
-      if (act_op->HasAttr(attrs.first)) {
-        conv_op->SetAttr(attrs.second, act_op->GetAttr(attrs.first));
-      }
-    }
+    SetActivationAttrs(conv_op, activation->Op(), act_type);
 
-    if (act_type == "gelu" && activation->Op()->HasAttr("approximate")) {
-      act_type =
-          PADDLE_GET_CONST(bool, activation->Op()->GetAttr("approximate"))
-              ? "gelu_tanh"
-              : "gelu_erf";
-      conv_op->SetAttr("fuse_alpha", 0.0f);
-      conv_op->SetAttr("fuse_beta", 0.0f);
-    }
-    conv_op->SetAttr("fuse_activation", act_type);
     conv_op->SetOutput("Output", {activation_out->Name()});
 
     IR_NODE_LINK_TO(conv, activation_out);
@@ -105,7 +90,7 @@ void ConvActivationMkldnnFusePass::FuseConvAct(Graph* graph,
 void ConvActivationMkldnnFusePass::FuseConvConcatAct(
     Graph* graph, std::string& act_type) const {
   PADDLE_ENFORCE_NOT_NULL(
-      graph, platform::errors::InvalidArgument("Graph cannot be nullptr."));
+      graph, phi::errors::InvalidArgument("Graph cannot be nullptr."));
   FusePassBase::Init("conv2d_concat_" + act_type + "_mkldnn_fuse_pass", graph);
 
   GraphPatternDetector gpd;
@@ -137,13 +122,13 @@ void ConvActivationMkldnnFusePass::FuseConvConcatAct(
         return;
       }
 
-      bool is_not_conv_mkldnn =
+      bool is_not_conv_onednn =
           !(prev_op_nodes[0]->Op()->GetAttrIfExists<bool>("use_mkldnn"));
       if ((prev_op_nodes[0]->Op()->Type() != "conv2d" &&
            prev_op_nodes[0]->Op()->Type() != "fused_conv2d") ||
-          is_not_conv_mkldnn) {
-        LOG(WARNING) << "This fuse pass supports only conv2d(mkldnn) | "
-                        "fused_conv2d(mkldnn) + activation.";
+          is_not_conv_onednn) {
+        LOG(WARNING) << "This fuse pass supports only conv2d(oneDNN) | "
+                        "fused_conv2d(oneDNN) + activation.";
         return;
       }
     }
@@ -153,23 +138,8 @@ void ConvActivationMkldnnFusePass::FuseConvConcatAct(
       if (conv_op->Type() == "conv2d") {
         conv_op->SetType("fused_conv2d");
       }
-      OpDesc* act_op = activation_op->Op();
 
-      auto attr_map = phi::funcs::GetAttributeMap(act_type);
-      for (const auto& attrs : attr_map) {
-        if (act_op->HasAttr(attrs.first)) {
-          conv_op->SetAttr(attrs.second, act_op->GetAttr(attrs.first));
-        }
-      }
-
-      if (act_type == "gelu" && act_op->HasAttr("approximate")) {
-        act_type = PADDLE_GET_CONST(bool, act_op->GetAttr("approximate"))
-                       ? "gelu_tanh"
-                       : "gelu_erf";
-        conv_op->SetAttr("fuse_alpha", 0.0f);
-        conv_op->SetAttr("fuse_beta", 0.0f);
-      }
-      conv_op->SetAttr("fuse_activation", act_type);
+      SetActivationAttrs(conv_op, activation_op->Op(), act_type);
     }
 
     concat_op->Op()->SetOutput("Out", {activation_out->Name()});
diff --git a/paddle/fluid/framework/ir/mkldnn/elt_act_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/elt_act_mkldnn_fuse_pass.cc
index 618b6993729f54aafb229973c0ec7fc1f381ed87..4b3f6a95d6dd17d67bb31b35551dfdbc3d965eca 100644
--- a/paddle/fluid/framework/ir/mkldnn/elt_act_mkldnn_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/elt_act_mkldnn_fuse_pass.cc
@@ -15,8 +15,8 @@
 #include "paddle/fluid/framework/ir/mkldnn/elt_act_mkldnn_fuse_pass.h"
 
 #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
+#include "paddle/fluid/framework/ir/mkldnn/activation_onednn_fuse_pass.h"
 #include "paddle/fluid/framework/op_version_registry.h"
-#include "paddle/phi/backends/onednn/onednn_reuse.h"
 #include "paddle/phi/core/enforce.h"
 #include "paddle/utils/string/pretty_log.h"
 
@@ -27,7 +27,7 @@ namespace ir {
 using string::PrettyLogDetail;
 
 void ElementwiseActivationOneDNNPass::ApplyImpl(Graph *graph) const {
-  auto act_types = phi::funcs::GetSupportedActivations();
+  auto act_types = GetSupportedActivations();
   std::vector<std::string> elt_types = {
       "elementwise_add", "elementwise_sub", "elementwise_mul"};
 
@@ -42,7 +42,7 @@ void ElementwiseActivationOneDNNPass::FuseElementwiseAct(
     const std::string &elt_type,
     const std::string &act_type) const {
   PADDLE_ENFORCE_NOT_NULL(
-      graph, platform::errors::InvalidArgument("Graph cannot be nullptr."));
+      graph, phi::errors::InvalidArgument("Graph cannot be nullptr."));
   FusePassBase::Init(elt_type + "_" + act_type + "_mkldnn_fuse_pass", graph);
 
   GraphPatternDetector gpd;
@@ -62,35 +62,8 @@ void ElementwiseActivationOneDNNPass::FuseElementwiseAct(
     GET_IR_NODE_FROM_SUBGRAPH(
         activation_out, activation_out, elementwise_act_pattern);
 
-    auto *elementwise_op = elementwise->Op();
-
-    if (elementwise_op->HasAttr("use_mkldnn")) {
-      const std::string wo_elt_type =
-          "The " + elt_type;  // Workaround for PP error message checking.
-      PADDLE_ENFORCE_EQ(
-          PADDLE_GET_CONST(bool, elementwise_op->GetAttr("use_mkldnn")),
-          true,
-          platform::errors::PreconditionNotMet(
-              wo_elt_type + "+Act fusion may happen only when oneDNN library "
-                            "is used."));
-    }
-
-    auto *activation_op = activation->Op();
-    auto attr_map = phi::funcs::GetAttributeMap(act_type);
-    for (const auto &attr : attr_map) {
-      if (activation_op->HasAttr(attr.first)) {
-        elementwise_op->SetAttr(attr.second,
-                                activation_op->GetAttr(attr.first));
-      }
-    }
-
-    if (act_type == "gelu" && activation_op->HasAttr("approximate") &&
-        PADDLE_GET_CONST(bool, activation_op->GetAttr("approximate")))
-      elementwise_op->SetAttr("fuse_activation", std::string("gelu_tanh"));
-    else
-      elementwise_op->SetAttr("fuse_activation", act_type);
-
-    elementwise_op->SetOutput("Out", {activation_out->Name()});
+    SetActivationAttrs(elementwise->Op(), activation->Op(), act_type);
+    elementwise->Op()->SetOutput("Out", {activation_out->Name()});
 
     IR_OP_VAR_LINK(elementwise, activation_out);
     GraphSafeRemoveNodes(g, {activation, elementwise_out});
diff --git a/paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.cc
index 60ab407f00c5af29cb831437bda5d7554ba23321..d007ef16d33ec2df3218efba74a73ed37d370dd5 100644
--- a/paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.cc
@@ -14,8 +14,8 @@
 
 #include "paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.h"
 
+#include "paddle/fluid/framework/ir/mkldnn/activation_onednn_fuse_pass.h"
 #include "paddle/fluid/framework/op_version_registry.h"
-#include "paddle/phi/backends/onednn/onednn_reuse.h"
 #include "paddle/utils/string/pretty_log.h"
 
 namespace paddle {
@@ -25,7 +25,7 @@ namespace ir {
 using string::PrettyLogDetail;
 
 void FuseFCActOneDNNPass::ApplyImpl(Graph *graph) const {
-  auto act_types = phi::funcs::GetSupportedActivations();
+  auto act_types = GetSupportedActivations();
 
   for (auto act_type : act_types) FuseFCAct(graph, act_type);
 }
@@ -33,7 +33,7 @@ void FuseFCActOneDNNPass::ApplyImpl(Graph *graph) const {
 void FuseFCActOneDNNPass::FuseFCAct(Graph *graph,
                                     const std::string &act_type) const {
   PADDLE_ENFORCE_NOT_NULL(
-      graph, platform::errors::InvalidArgument("Graph cannot be nullptr."));
+      graph, phi::errors::InvalidArgument("Graph cannot be nullptr."));
   FusePassBase::Init("fc_" + act_type + "_mkldnn_fuse_pass", graph);
 
   GraphPatternDetector gpd;
@@ -50,35 +50,8 @@ void FuseFCActOneDNNPass::FuseFCAct(Graph *graph,
     GET_IR_NODE_FROM_SUBGRAPH(act, activation, fc_act_pattern);
     GET_IR_NODE_FROM_SUBGRAPH(act_out, activation_out, fc_act_pattern);
 
-    auto *fc_op = fc->Op();
-    auto *act_op = act->Op();
-
-    if (fc_op->HasAttr("use_mkldnn")) {
-      PADDLE_ENFORCE(
-          PADDLE_GET_CONST(bool, fc_op->GetAttr("use_mkldnn")),
-          platform::errors::PreconditionNotMet(
-              "The FC+Act fusion may happen only when oneDNN library "
-              "is used."));
-    }
-
-    auto attr_map = phi::funcs::GetAttributeMap(act_type);
-    for (const auto &attr : attr_map) {
-      if (act_op->HasAttr(attr.first)) {
-        fc_op->SetAttr(attr.second, act_op->GetAttr(attr.first));
-      }
-    }
-
-    if (act_type == "gelu" && act_op->HasAttr("approximate")) {
-      std::string gelu_act_type =
-          PADDLE_GET_CONST(bool, act_op->GetAttr("approximate")) ? "gelu_tanh"
-                                                                 : "gelu_erf";
-      fc_op->SetAttr("fuse_activation", gelu_act_type);
-    } else {
-      fc_op->SetAttr("fuse_activation", act_type);
-    }
-
-    fc_op->SetAttr("use_mkldnn", true);
-    fc_op->SetOutput("Out", {act_out->Name()});
+    SetActivationAttrs(fc->Op(), act->Op(), act_type);
+    fc->Op()->SetOutput("Out", {act_out->Name()});
 
     IR_OP_VAR_LINK(fc, act_out);
     GraphSafeRemoveNodes(g, {act, fc_out});
diff --git a/paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.cc
index 07a608c5a2b4a8e48830281727e288bbc2c9d5dc..50db74e46d1d6929b84b9fb89b11f48c485a8e25 100644
--- a/paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.cc
@@ -14,8 +14,8 @@
 
 #include "paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.h"
 
+#include "paddle/fluid/framework/ir/mkldnn/activation_onednn_fuse_pass.h"
 #include "paddle/fluid/framework/op_version_registry.h"
-#include "paddle/phi/backends/onednn/onednn_reuse.h"
 #include "paddle/utils/string/pretty_log.h"
 
 namespace paddle {
@@ -25,7 +25,7 @@ namespace ir {
 using string::PrettyLogDetail;
 
 void MatmulActivationMkldnnFusePass::ApplyImpl(Graph* graph) const {
-  auto act_types = phi::funcs::GetSupportedActivations();
+  auto act_types = GetSupportedActivations();
   auto matmul_types = {"matmul", "matmul_v2"};
 
   for (const auto& matmul_type : matmul_types)
@@ -37,7 +37,7 @@ void MatmulActivationMkldnnFusePass::ApplyImpl(Graph* graph) const {
 void MatmulActivationMkldnnFusePass::FuseMatmulAct(
     Graph* graph, const std::string& matmul_type, std::string& act_type) const {
   PADDLE_ENFORCE_NOT_NULL(
-      graph, platform::errors::InvalidArgument("Graph cannot be nullptr."));
+      graph, phi::errors::InvalidArgument("Graph cannot be nullptr."));
   FusePassBase::Init(matmul_type + "_" + act_type + "_mkldnn_fuse_pass", graph);
 
   GraphPatternDetector gpd;
@@ -61,24 +61,8 @@ void MatmulActivationMkldnnFusePass::FuseMatmulAct(
     GET_IR_NODE_FROM_SUBGRAPH(
         activation_out, activation_out, matmul_act_pattern);
 
-    OpDesc* matmul_op = matmul->Op();
-    OpDesc* act_op = activation->Op();
-
-    auto attr_map = phi::funcs::GetAttributeMap(act_type);
-    for (const auto& attrs : attr_map) {
-      if (act_op->HasAttr(attrs.first)) {
-        matmul_op->SetAttr(attrs.second, act_op->GetAttr(attrs.first));
-      }
-    }
-
-    if (act_type == "gelu" && activation->Op()->HasAttr("approximate")) {
-      act_type =
-          PADDLE_GET_CONST(bool, activation->Op()->GetAttr("approximate"))
-              ? "gelu_tanh"
-              : "gelu_erf";
-    }
-    matmul_op->SetAttr("fuse_activation", act_type);
-    matmul_op->SetOutput("Out", {activation_out->Name()});
+    SetActivationAttrs(matmul->Op(), activation->Op(), act_type);
+    matmul->Op()->SetOutput("Out", {activation_out->Name()});
 
     IR_NODE_LINK_TO(matmul, activation_out);
     GraphSafeRemoveNodes(graph, {activation, matmul_out});
diff --git a/paddle/fluid/framework/ir/mkldnn/softplus_activation_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/softplus_activation_mkldnn_fuse_pass.cc
index a4e74bb376daf7fdab322fac674e570e8e30b1ae..0954414bee1906e990966aa9e200f5cb510c7c22 100644
--- a/paddle/fluid/framework/ir/mkldnn/softplus_activation_mkldnn_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/softplus_activation_mkldnn_fuse_pass.cc
@@ -15,8 +15,8 @@
 #include "paddle/fluid/framework/ir/mkldnn/softplus_activation_mkldnn_fuse_pass.h"
 
 #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
+#include "paddle/fluid/framework/ir/mkldnn/activation_onednn_fuse_pass.h"
 #include "paddle/fluid/framework/op_version_registry.h"
-#include "paddle/phi/backends/onednn/onednn_reuse.h"
 #include "paddle/phi/core/enforce.h"
 #include "paddle/utils/string/pretty_log.h"
 
@@ -27,7 +27,7 @@ namespace ir {
 using string::PrettyLogDetail;
 
 void SoftplusActivationOneDNNPass::ApplyImpl(Graph *graph) const {
-  auto act_types = phi::funcs::GetSupportedActivations();
+  auto act_types = GetSupportedActivations();
 
   // Currently softplus can't be fused with hard_sigmoid
   act_types.erase(
@@ -42,7 +42,7 @@ void SoftplusActivationOneDNNPass::ApplyImpl(Graph *graph) const {
 void SoftplusActivationOneDNNPass::FuseSoftplusActivation(
     Graph *graph, const std::string &act_type) const {
   PADDLE_ENFORCE_NOT_NULL(
-      graph, platform::errors::InvalidArgument("Graph cannot be nullptr."));
+      graph, phi::errors::InvalidArgument("Graph cannot be nullptr."));
   FusePassBase::Init("softplus_activation", graph);
 
   GraphPatternDetector gpd;
@@ -63,34 +63,8 @@ void SoftplusActivationOneDNNPass::FuseSoftplusActivation(
     GET_IR_NODE_FROM_SUBGRAPH(
         activation, activation, softplus_activation_pattern);
 
-    auto *softplus_op = softplus->Op();
-
-    if (softplus_op->HasAttr("use_mkldnn")) {
-      PADDLE_ENFORCE_EQ(
-          PADDLE_GET_CONST(bool, softplus_op->GetAttr("use_mkldnn")),
-          true,
-          platform::errors::PreconditionNotMet("The softplus + activation "
-                                               "fusion may happen only when "
-                                               "oneDNN library is used."));
-    }
-
-    auto *activation_op = activation->Op();
-    auto attr_map = phi::funcs::GetAttributeMap(act_type);
-    for (const auto &attr : attr_map) {
-      if (activation_op->HasAttr(attr.first)) {
-        softplus_op->SetAttr(attr.second, activation_op->GetAttr(attr.first));
-      }
-    }
-
-    if (act_type == "gelu" && activation_op->HasAttr("approximate") &&
-        PADDLE_GET_CONST(bool, activation_op->GetAttr("approximate")))
-      softplus_op->SetAttr("fuse_activation", std::string("gelu_tanh"));
-    else
-      softplus_op->SetAttr("fuse_activation", act_type);
-
-    softplus_op->SetAttr("use_mkldnn", true);
-
-    softplus_op->SetOutput("Out", {activation_out->Name()});
+    SetActivationAttrs(softplus->Op(), activation->Op(), act_type);
+    softplus->Op()->SetOutput("Out", {activation_out->Name()});
 
     IR_OP_VAR_LINK(softplus, activation_out);
     GraphSafeRemoveNodes(g, {activation, softplus_out});
diff --git a/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc b/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc
index efdeaf8d34efe3ac676d15418f9e71272f347029..2d7a95da4201a67ddab677f8704668c1839f0fd6 100644
--- a/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc
+++ b/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc
@@ -144,8 +144,6 @@ std::unordered_set<std::string> OpTransInfo::GetDenyVarNames(
           const auto& arg_names = desc->Input(param_name);
           for (const auto& arg_name : arg_names) {
             deny_var_set.insert(arg_name);
-            VLOG(4) << "deny param [" << param_name << "]'s argument name"
-                    << " is [" << arg_name << "].";
           }
         }
 
@@ -153,8 +151,6 @@ std::unordered_set<std::string> OpTransInfo::GetDenyVarNames(
           const auto& arg_names = desc->Output(param_name);
           for (const auto& arg_name : arg_names) {
             deny_var_set.insert(arg_name);
-            VLOG(4) << "deny param [" << param_name << "]'s argument name"
-                    << " is [" << arg_name << "].";
           }
         }
       }
@@ -166,48 +162,25 @@ std::unordered_set<std::string> OpTransInfo::GetDenyVarNames(
   return deny_var_set;
 }
 
-std::unordered_set<std::string> OpTransInfo::GetIgnoreInplaceVarNames(
-    const OpDesc& op_desc) const {
-  if (!ignore_inplace_param_cond_.count(op_desc.Type())) {
-    return {};
-  }
+std::unordered_set<std::string> OpTransInfo::GetInplaceVarNames(
+    const GraphNodeSet& cluster_inputs, const GraphNodeSet& cluster_outputs) {
+  std::unordered_set<std::string> all_inputs, all_outputs;
 
-  const auto& ignore_inplace_names =
-      ignore_inplace_param_cond_.at(op_desc.Type());
-  VLOG(4) << "We found ignore inplace param "
-          << GetDebugInfo(ignore_inplace_names) << " in op [" << op_desc.Type()
-          << "].";
-
-  std::unordered_set<std::string> ignore_inplace_set;
-  for (const auto& param_name : ignore_inplace_names) {
-    if (op_desc.HasOutput(param_name)) {
-      const auto& arg_names = op_desc.Output(param_name);
-      ignore_inplace_set.insert(arg_names.begin(), arg_names.end());
-    }
+  for (auto* var : cluster_inputs) {
+    all_inputs.insert(var->Name());
+  }
+  for (auto* var : cluster_outputs) {
+    all_outputs.insert(var->Name());
   }
 
-  VLOG(4) << "All ignore inplace var names are "
-          << GetDebugInfo(ignore_inplace_set);
-
-  return ignore_inplace_set;
-}
-
-bool OpTransInfo::IsInplaceOp(
-    const OpDesc& op_desc,
-    const std::unordered_set<std::string>& deny_var_names) const {
-  const auto& ignore_inplace_set = GetIgnoreInplaceVarNames(op_desc);
-
-  auto inputs = op_desc.InputArgumentNames();
-  std::unordered_set<std::string> input_set(inputs.begin(), inputs.end());
-  for (auto& name : op_desc.OutputArgumentNames()) {
-    if (input_set.count(name) > 0 && !deny_var_names.count(name) &&
-        !ignore_inplace_set.count(name)) {
-      VLOG(4) << "The argument " << name << " in op " << op_desc.Type()
-              << " is a inplace op, skip!";
-      return true;
+  std::unordered_set<std::string> inplace_var_set;
+  for (const auto& var_name : all_inputs) {
+    if (all_outputs.count(var_name)) {
+      inplace_var_set.insert(var_name);
     }
   }
-  return false;
+
+  return inplace_var_set;
 }
 
 namespace {
@@ -503,6 +476,14 @@ std::unique_ptr<Graph> CreateNewSubGraph(const GraphNodeSet& cluster,
   // initialize empty map for kMemOptVarInfoFromMainGraph attribute,
   // it will be filled on the share_mem_opt_info_to_subgraph pass
   subgraph->GetOrInit<Name2VarInfoMap>(kMemOptVarInfoFromMainGraph);
+
+  auto inplace_var_names = std::make_unique<std::unordered_set<std::string>>(
+      OpTransInfo::GetInplaceVarNames(cluster_inputs, cluster_outputs));
+  VLOG_IF(4, !inplace_var_names->empty())
+      << "Inplace var in cluster are: " << GetDebugInfo(*inplace_var_names);
+  subgraph->Set<std::unordered_set<std::string>>(kInplaceVarNames,
+                                                 inplace_var_names.release());
+
   return subgraph;
 }
 
@@ -594,7 +575,6 @@ void AddCinnOpToGraph(const GraphNodeSet& cluster,
                       const GraphNodeSet& cluster_inputs,
                       const GraphNodeSet& cluster_outputs,
                       int64_t compilation_key,
-                      const std::unordered_set<std::string>& deny_var_set,
                       Graph* graph) {
   // Add the cinn launch op
   framework::OpDesc cinn_op_desc;
@@ -615,6 +595,7 @@ void AddCinnOpToGraph(const GraphNodeSet& cluster,
   cinn_op_desc.SetAttr(operators::kCompilationKey, compilation_key);
   cinn_op_desc.SetAttr(OpProtoAndCheckerMaker::OpRoleAttrName(),
                        ExtractOpRole(cluster));
+
   cinn_op_desc.Flush();
   auto* cinn_op_node = graph->CreateOpNode(&cinn_op_desc);
   // Add new links from or to the cinn launch op node
@@ -639,21 +620,15 @@ void RemoveSubGraphFromGraph(const GraphNodeSet& cluster,
 // kCinnLaunchOp, and inputs ares cluster_inputs and outputs are
 // cluster_outputs.
 // Meanwhile, move all links of cluster to the cinn op.
-void ReplaceSubGraphWithCinnOpNode(
-    const GraphNodeSet& cluster,
-    const GraphNodeSet& cluster_inputs,
-    const GraphNodeSet& cluster_outputs,
-    const GraphNodeSet& cluster_internals,
-    int64_t compilation_key,
-    const std::unordered_set<std::string>& deny_var_set,
-    Graph* graph) {
+void ReplaceSubGraphWithCinnOpNode(const GraphNodeSet& cluster,
+                                   const GraphNodeSet& cluster_inputs,
+                                   const GraphNodeSet& cluster_outputs,
+                                   const GraphNodeSet& cluster_internals,
+                                   int64_t compilation_key,
+                                   Graph* graph) {
   // Add the cinn op node whose name is "kCinnLaunchOp" into graph
-  AddCinnOpToGraph(cluster,
-                   cluster_inputs,
-                   cluster_outputs,
-                   compilation_key,
-                   deny_var_set,
-                   graph);
+  AddCinnOpToGraph(
+      cluster, cluster_inputs, cluster_outputs, compilation_key, graph);
   // Remove the cinn subgraph from graph
   RemoveSubGraphFromGraph(cluster, cluster_internals, graph);
 }
@@ -667,9 +642,7 @@ void SearchAllSubgraphs(Graph* graph, bool is_inference_stage) {
   auto deny_ops = StringSplit(FLAGS_deny_cinn_ops, kDelim);
 
   OpTransInfo trans_info;
-  const auto& deny_var_set = trans_info.GetDenyVarNames(graph->Nodes());
-  auto teller = [&allow_ops, &deny_ops, &trans_info, &deny_var_set](
-                    const Node* node) {
+  auto teller = [&allow_ops, &deny_ops, &trans_info](const Node* node) {
     const auto& node_name = node->Name();
     bool registered = ::cinn::frontend::OpMapperRegistry::Global()->Find(
                           node_name) != nullptr;
@@ -679,10 +652,9 @@ void SearchAllSubgraphs(Graph* graph, bool is_inference_stage) {
       is_dynamic = trans_info.dynamic_op_cond().at(node_name)(*node);
     }
 
-    bool is_support =
-        registered && !trans_info.default_deny_ops().count(node_name) &&
-        !is_dynamic &&
-        (node->IsOp() && !trans_info.IsInplaceOp(*node->Op(), deny_var_set));
+    bool is_support = registered &&
+                      !trans_info.default_deny_ops().count(node_name) &&
+                      !is_dynamic;
     // if the op type is registered in CINN and allow_ops is not empty, return
     // true only when it is in allow_ops
     if (!allow_ops.empty()) {
@@ -714,19 +686,23 @@ void SearchAllSubgraphs(Graph* graph, bool is_inference_stage) {
     return res;
   };
 
-  std::unordered_set<std::string> skip_gc_var_names;
+  std::unordered_set<std::string> all_skip_gc_vars;
   if (graph->Has(kSkipGcVarNames)) {
-    skip_gc_var_names =
+    all_skip_gc_vars =
         graph->Get<std::unordered_set<std::string>>(kSkipGcVarNames);
+    VLOG_IF(4, !all_skip_gc_vars.empty())
+        << "All skip gc var names are: " << GetDebugInfo(all_skip_gc_vars);
   }
 
+  const auto& deny_var_set = trans_info.GetDenyVarNames(graph->Nodes());
+  VLOG_IF(4, !deny_var_set.empty())
+      << "All deny var names are: " << GetDebugInfo(deny_var_set);
+
   auto* cinn_compiler = CinnCompiler::GetInstance();
   for (const auto& node_vec : clusters) {
     // Classify var node to inputs, outputs, and internals.
     GraphNodeSet cluster_set(node_vec.begin(), node_vec.end());
 
-    auto deny_var_set = trans_info.GetDenyVarNames(cluster_set);
-
     GraphNodeSet cluster_inputs, cluster_outputs, cluster_internals;
     AnalyseClusterVariables(cluster_set,
                             deny_var_set,
@@ -734,7 +710,7 @@ void SearchAllSubgraphs(Graph* graph, bool is_inference_stage) {
                             &cluster_outputs,
                             &cluster_internals,
                             is_inference_stage,
-                            skip_gc_var_names);
+                            all_skip_gc_vars);
 
     VLOG(4) << "Cluster Ops: " << cluster_debug_info(cluster_set);
     VLOG(4) << "Cluster input vars: " << cluster_debug_info(cluster_inputs);
@@ -747,8 +723,6 @@ void SearchAllSubgraphs(Graph* graph, bool is_inference_stage) {
         cluster_set, cluster_internals, cluster_inputs, cluster_outputs);
     // Deliver the kSkipGcVarNames attr (if exists) to the subgraph
     if (graph->Has(kSkipGcVarNames)) {
-      const auto& all_skip_gc_vars =
-          graph->Get<std::unordered_set<std::string>>(kSkipGcVarNames);
       auto& sub_skip_gc_vars =
           subgraph->GetOrInit<std::unordered_set<std::string>>(kSkipGcVarNames);
       sub_skip_gc_vars = all_skip_gc_vars;
@@ -763,7 +737,6 @@ void SearchAllSubgraphs(Graph* graph, bool is_inference_stage) {
                                   cluster_outputs,
                                   cluster_internals,
                                   compilation_key,
-                                  deny_var_set,
                                   graph);
   }
 }
diff --git a/paddle/fluid/framework/paddle2cinn/build_cinn_pass.h b/paddle/fluid/framework/paddle2cinn/build_cinn_pass.h
index 93e5186421725a9546d043804ec1e6b8b36c52bf..1797d07faf5c72d4e784c37e018fae6eabd92063 100644
--- a/paddle/fluid/framework/paddle2cinn/build_cinn_pass.h
+++ b/paddle/fluid/framework/paddle2cinn/build_cinn_pass.h
@@ -39,6 +39,7 @@ constexpr char kOutputVars[] = "OutputVars";
 constexpr char kMemOptVarInfoFromMainGraph[] =
     "mem_opt_var_info_from_main_graph";
 constexpr char kSkipGcVarNames[] = "skip_gc_vars";
+constexpr char kInplaceVarNames[] = "InplaceVars";
 
 using Name2VarInfoMap =
     std::unordered_map<std::string,
@@ -67,11 +68,8 @@ class OpTransInfo {
   std::unordered_set<std::string> GetDenyVarNames(
       const GraphNodeSet& cluster) const;
 
-  std::unordered_set<std::string> GetIgnoreInplaceVarNames(
-      const OpDesc& op_desc) const;
-
-  bool IsInplaceOp(const OpDesc& op_desc,
-                   const std::unordered_set<std::string>& deny_var_names) const;
+  static std::unordered_set<std::string> GetInplaceVarNames(
+      const GraphNodeSet& cluster_inputs, const GraphNodeSet& cluster_outputs);
 
  private:
   DyOpCondT dynamic_op_cond_;
@@ -79,9 +77,6 @@ class OpTransInfo {
   DeParamCondT deny_param_cond_{{"batch_norm", {"ReserveSpace"}},
                                 {"batch_norm_grad", {"ReserveSpace"}}};
 
-  DeParamCondT ignore_inplace_param_cond_{
-      {"batch_norm", {"MeanOut", "VarianceOut"}}};
-
   std::unordered_set<std::string> default_deny_ops_{"feed", "fetch"};
 };
 
diff --git a/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc b/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc
index 94bc1241895ef618437e7903de8a7629080a0d8e..b703ca04f9274eae08e7141d80b9d18c54e367ed 100644
--- a/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc
+++ b/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc
@@ -258,17 +258,16 @@ void CinnGraphSymbolization::RunGraph(const OpMapperContext& ctx) const {
 std::unordered_set<std::string> CinnGraphSymbolization::GetFetchIds() const {
   std::unordered_set<std::string> fetch_names;
   fetch_names.reserve(fetch_var_names_.size());
-  std::for_each(
-      fetch_var_names_.begin(),
-      fetch_var_names_.end(),
-      [this, &fetch_names](const std::string& name) {
-        PADDLE_ENFORCE_EQ(
-            var_model_to_program_map_.count(name),
-            1,
-            platform::errors::PreconditionNotMet(
-                "Cannot find %s in var_model_to_program_map_", name.c_str()));
-        fetch_names.insert(var_model_to_program_map_.at(name));
-      });
+  std::for_each(fetch_var_names_.begin(),
+                fetch_var_names_.end(),
+                [this, &fetch_names](const std::string& name) {
+                  PADDLE_ENFORCE_EQ(
+                      var_map_.count(name),
+                      1,
+                      platform::errors::PreconditionNotMet(
+                          "Cannot find %s in var_map_", name.c_str()));
+                  fetch_names.insert(var_map_.at(name)->id);
+                });
   return fetch_names;
 }
 
diff --git a/paddle/fluid/imperative/amp_auto_cast.cc b/paddle/fluid/imperative/amp_auto_cast.cc
index 55c15208208085feb9207cac22d105b4ceb96e80..bf428ddf9b7e22f9c83271393c83e7072fef5870 100644
--- a/paddle/fluid/imperative/amp_auto_cast.cc
+++ b/paddle/fluid/imperative/amp_auto_cast.cc
@@ -337,6 +337,11 @@ NameVarMap<VarType> AutoCastInputs(const std::string& op_type,
           pair.first != "X") {
         continue;
       }
+      if ((op_type == "max_pool2d_with_index_grad" ||
+           op_type == "max_pool2d_with_index") &&
+          pair.first == "Mask") {
+        continue;
+      }
 
       if ((op_type == "fused_attention" || op_type == "fused_feedforward")) {
         if (pair.first == "LnScale" || pair.first == "LnBias" ||
@@ -381,6 +386,11 @@ NameVarMap<VarType> AutoCastInputs(const std::string& op_type,
           pair.first == "X" && dst_type == framework::proto::VarType::FP32) {
         continue;
       }
+      if ((op_type == "max_pool2d_with_index_grad" ||
+           op_type == "max_pool2d_with_index") &&
+          pair.first != "Mask" && dst_type == framework::proto::VarType::FP32) {
+        continue;
+      }
       if ((op_type == "fused_attention" || op_type == "fused_feedforwad") &&
           dst_type == framework::proto::VarType::FP32) {
         if (pair.first != "LnScale" && pair.first != "LnBias" &&
@@ -428,6 +438,11 @@ NameVarMap<VarType> CastPureFp16Inputs(const std::string& op_type,
         pair.first != "X") {
       continue;
     }
+    if ((op_type == "max_pool2d_with_index_grad" ||
+         op_type == "max_pool2d_with_index") &&
+        pair.first == "Mask") {
+      continue;
+    }
     if ((op_type == "fused_attention" || op_type == "fused_feedforward")) {
       if (pair.first == "LnScale" || pair.first == "LnBias" ||
           pair.first == "Ln2Scale" || pair.first == "Ln2Bias" ||
diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index f4c35b49a0d00760f78f3d22a1dab1ee0ffae010..6ccad994b06a8085228e41007be503e8d99d09e4 100644
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -1609,6 +1609,51 @@ std::vector<std::string> AnalysisPredictor::GetOutputNames() {
   return output_names;
 }
 
+std::map<std::string, std::vector<int64_t>>
+AnalysisPredictor::GetOutputTensorShape() {
+  std::map<std::string, std::vector<int64_t>> output_shapes;
+  std::vector<std::string> names = GetOutputNames();
+  for (std::string name : names) {
+    auto *var = inference_program_->Block(0).FindVar(name);
+    PADDLE_ENFORCE_NOT_NULL(var,
+                            platform::errors::PreconditionNotMet(
+                                "Output %s does not exist.", name));
+    output_shapes[name] = var->GetShape();
+  }
+  return output_shapes;
+}
+
+std::map<std::string, paddle_infer::DataType>
+AnalysisPredictor::GetOutputTypes() {
+  std::map<std::string, paddle_infer::DataType> output_type;
+  std::vector<std::string> names = GetOutputNames();
+  for (const auto &name : names) {
+    auto *var = inference_program_->Block(0).FindVar(name);
+    PADDLE_ENFORCE_NOT_NULL(
+        var,
+        platform::errors::PreconditionNotMet(
+            "Output %s does not exist inference_program_.", name));
+    auto dtype = var->GetDataType();
+    if (dtype == paddle::framework::proto::VarType::FP32) {
+      output_type[name] = paddle_infer::DataType::FLOAT32;
+    } else if (dtype == paddle::framework::proto::VarType::FP16) {
+      output_type[name] = paddle_infer::DataType::FLOAT16;
+    } else if (dtype == paddle::framework::proto::VarType::INT64) {
+      output_type[name] = paddle_infer::DataType::INT64;
+    } else if (dtype == paddle::framework::proto::VarType::INT32) {
+      output_type[name] = paddle_infer::DataType::INT32;
+    } else if (dtype == paddle::framework::proto::VarType::UINT8) {
+      output_type[name] = paddle_infer::DataType::UINT8;
+    } else if (dtype == paddle::framework::proto::VarType::INT8) {
+      output_type[name] = paddle_infer::DataType::INT8;
+    } else {
+      PADDLE_THROW(paddle::platform::errors::Unimplemented(
+          "Unsupported data type `%s` when get output dtype ", dtype));
+    }
+  }
+  return output_type;
+}
+
 std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetInputTensor(
     const std::string &name) {
   framework::Scope *scope;
@@ -2477,6 +2522,10 @@ std::vector<std::string> Predictor::GetInputNames() {
   return predictor_->GetInputNames();
 }
 
+std::map<std::string, std::vector<int64_t>> Predictor::GetInputTensorShape() {
+  return predictor_->GetInputTensorShape();
+}
+
 std::map<std::string, DataType> Predictor::GetInputTypes() {
   return predictor_->GetInputTypes();
 }
@@ -2493,6 +2542,14 @@ std::unique_ptr<Tensor> Predictor::GetOutputHandle(const std::string &name) {
   return predictor_->GetOutputTensor(name);
 }
 
+std::map<std::string, std::vector<int64_t>> Predictor::GetOutputTensorShape() {
+  return predictor_->GetOutputTensorShape();
+}
+
+std::map<std::string, DataType> Predictor::GetOutputTypes() {
+  return predictor_->GetOutputTypes();
+}
+
 bool Predictor::Run() { return predictor_->ZeroCopyRun(); }
 
 std::unique_ptr<Predictor> Predictor::Clone(void *stream) {
diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h
index 95a58d856f3e7f12e6372d9c501d6629b9aef15a..5a578a9b94fcb750379225d81edd4e98ca7b3200 100644
--- a/paddle/fluid/inference/api/analysis_predictor.h
+++ b/paddle/fluid/inference/api/analysis_predictor.h
@@ -191,6 +191,18 @@ class AnalysisPredictor : public PaddlePredictor {
   /// \return the map of input names and type
   ///
   std::map<std::string, paddle_infer::DataType> GetInputTypes() override;
+  ///
+  /// \brief Get all output names and their corresponding shapes
+  ///
+  /// \return the map of output names and shapes
+  ///
+  std::map<std::string, std::vector<int64_t>> GetOutputTensorShape() override;
+  ///
+  /// \brief Get all output names and their corresponding type
+  ///
+  /// \return the map of output names and type
+  ///
+  std::map<std::string, paddle_infer::DataType> GetOutputTypes() override;
 
   ///
   /// \brief Run the prediction engine
diff --git a/paddle/fluid/inference/api/analysis_predictor_tester.cc b/paddle/fluid/inference/api/analysis_predictor_tester.cc
index 627b6fba02313ccd5d4179907d5351b58a69b49e..3b462cc941e710bf3bb207abb990b4cbdc1b3e34 100644
--- a/paddle/fluid/inference/api/analysis_predictor_tester.cc
+++ b/paddle/fluid/inference/api/analysis_predictor_tester.cc
@@ -106,6 +106,8 @@ TEST(AnalysisPredictor, analysis_on) {
   ASSERT_EQ(predictor->scope_->parent(), nullptr);
   ASSERT_EQ(predictor->sub_scope_->parent(), predictor->scope_.get());
   ASSERT_EQ(predictor->GetInputTypes().size(), 4UL);
+  ASSERT_EQ(predictor->GetOutputTypes().size(), 1UL);
+  ASSERT_EQ(predictor->GetOutputTensorShape().size(), 1UL);
   // 2. Dummy Input Data
   int64_t data[4] = {1, 2, 3, 4};
   PaddleTensor tensor;
@@ -430,6 +432,8 @@ TEST(Predictor, Run) {
 
   auto predictor = CreatePredictor(config);
   ASSERT_EQ(predictor->GetInputTypes().size(), 4UL);
+  ASSERT_EQ(predictor->GetOutputTypes().size(), 1UL);
+  ASSERT_EQ(predictor->GetOutputTensorShape().size(), 1UL);
 
   auto w0 = predictor->GetInputHandle("firstw");
   auto w1 = predictor->GetInputHandle("secondw");
diff --git a/paddle/fluid/inference/api/paddle_api.h b/paddle/fluid/inference/api/paddle_api.h
index ff1ec1eba30259b44c252e3eb38d7008697f4058..e83c1a9f9444c90944162beaeeeaa6302593390a 100644
--- a/paddle/fluid/inference/api/paddle_api.h
+++ b/paddle/fluid/inference/api/paddle_api.h
@@ -243,6 +243,19 @@ class PD_INFER_DECL PaddlePredictor {
   /// \return Output tensor names.
   virtual std::vector<std::string> GetOutputNames() { return {}; }
 
+  /// \brief Get the output shape of the model.
+  /// \return A map contains all the output names and shape defined in the
+  /// model.
+  virtual std::map<std::string, std::vector<int64_t>> GetOutputTensorShape() {
+    return {};
+  }
+
+  /// \brief Get the output type of the model.
+  /// \return A map contains all the output names and type defined in the model.
+  virtual std::map<std::string, paddle_infer::DataType> GetOutputTypes() {
+    return {};
+  }
+
   /// \brief Get the input ZeroCopyTensor by name.
   /// Be inherited by AnalysisPredictor, Only used in ZeroCopy scenarios.
   /// The name is obtained from the GetInputNames() interface.
diff --git a/paddle/fluid/inference/api/paddle_inference_api.h b/paddle/fluid/inference/api/paddle_inference_api.h
index 1a52c011b2a806490cf49053dad6fc122069377b..d7f15e0529894f7991c807fa7519e7d28ae6a4c9 100644
--- a/paddle/fluid/inference/api/paddle_inference_api.h
+++ b/paddle/fluid/inference/api/paddle_inference_api.h
@@ -92,6 +92,13 @@ class PD_INFER_DECL Predictor {
   ///
   explicit Predictor(const Config& config);
 
+  ///
+  /// \brief Get all input names and their corresponding shapes
+  ///
+  /// \return the map of input names and shape
+  ///
+  std::map<std::string, std::vector<int64_t>> GetInputTensorShape();
+
   ///
   /// \brief Get all input names and their corresponding type
   ///
@@ -136,6 +143,20 @@ class PD_INFER_DECL Predictor {
   ///
   std::unique_ptr<Tensor> GetOutputHandle(const std::string& name);
 
+  ///
+  /// \brief Get all output names and their corresponding shapes
+  ///
+  /// \return the map of output names and shape
+  ///
+  std::map<std::string, std::vector<int64_t>> GetOutputTensorShape();
+
+  ///
+  /// \brief Get all output names and their corresponding type
+  ///
+  /// \return the map of output names and type
+  ///
+  std::map<std::string, DataType> GetOutputTypes();
+
   ///
   /// \brief Clone to get the new predictor. thread safe.
   ///
diff --git a/paddle/fluid/inference/capi_exp/pd_config.cc b/paddle/fluid/inference/capi_exp/pd_config.cc
index 6ff88beb70225e6a002c70f1d0d1a79523a00698..dd0979274f75daeeff45326af2deacd859d8d5d1 100644
--- a/paddle/fluid/inference/capi_exp/pd_config.cc
+++ b/paddle/fluid/inference/capi_exp/pd_config.cc
@@ -55,8 +55,9 @@ __pd_give PD_Config* PD_ConfigCreate() {
 }
 
 void PD_ConfigDestroy(__pd_take PD_Config* pd_config) {
-  CHECK_AND_CONVERT_PD_CONFIG;
-  delete reinterpret_cast<Config*>(config);
+  if (pd_config != NULL) {
+    delete reinterpret_cast<Config*>(pd_config);
+  }
 }
 
 void PD_ConfigSetModel(__pd_keep PD_Config* pd_config,
@@ -116,9 +117,12 @@ PD_Bool PD_ConfigUseFcPadding(__pd_keep PD_Config* pd_config) {
 
 void PD_ConfigEnableUseGpu(__pd_keep PD_Config* pd_config,
                            uint64_t memory_pool_init_size_mb,
-                           int32_t device_id) {
+                           int32_t device_id,
+                           PD_PrecisionType precision_mode) {
   CHECK_AND_CONVERT_PD_CONFIG;
-  config->EnableUseGpu(memory_pool_init_size_mb, device_id);
+  config->EnableUseGpu(memory_pool_init_size_mb,
+                       device_id,
+                       ConvertToCxxPrecisionType(precision_mode));
 }
 void PD_ConfigDisableGpu(__pd_keep PD_Config* pd_config) {
   CHECK_AND_CONVERT_PD_CONFIG;
@@ -427,6 +431,14 @@ void PD_ConfigSetBfloat16Op(__pd_keep PD_Config* pd_config,
   }
   config->SetBfloat16Op(std::move(op_names));
 }
+void PD_ConfigEnableMkldnnInt8(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  config->EnableMkldnnInt8();
+}
+PD_Bool PD_ConfigMkldnnInt8Enabled(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  return config->mkldnn_int8_enabled();
+}
 PD_Bool PD_ConfigThreadLocalStreamEnabled(__pd_keep PD_Config* pd_config) {
   CHECK_AND_CONVERT_PD_CONFIG;
   return config->thread_local_stream_enabled();
@@ -484,6 +496,10 @@ void PD_ConfigEnableGpuMultiStream(__pd_keep PD_Config* pd_config) {
   CHECK_AND_CONVERT_PD_CONFIG;
   config->EnableGpuMultiStream();
 }
+void PD_ConfigSetExecStream(__pd_keep PD_Config* pd_config, void* stream) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  return config->SetExecStream(stream);
+}
 void PD_ConfigPartiallyRelease(__pd_keep PD_Config* pd_config) {
   CHECK_AND_CONVERT_PD_CONFIG;
   config->PartiallyRelease();
diff --git a/paddle/fluid/inference/capi_exp/pd_config.h b/paddle/fluid/inference/capi_exp/pd_config.h
index feb1d5724438aa18cea2e8a83f844935eec09e5b..19e1a1c139d4c27d7832d646bb2197fd768e9968 100644
--- a/paddle/fluid/inference/capi_exp/pd_config.h
+++ b/paddle/fluid/inference/capi_exp/pd_config.h
@@ -132,11 +132,13 @@ PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigUseFcPadding(
 /// \param[in] memory_pool_init_size_mb initial size of the GPU memory pool in
 /// MB.
 /// \param[in] device_id device_id the GPU card to use.
+/// \param[in] precision_mode the precision used in Paddle-GPU inference.
 ///
 PADDLE_CAPI_EXPORT extern void PD_ConfigEnableUseGpu(
     __pd_keep PD_Config* pd_config,
     uint64_t memory_pool_init_size_mb,
-    int32_t device_id);
+    int32_t device_id,
+    PD_PrecisionType precision_mode);
 ///
 /// \brief Turn off GPU.
 ///
@@ -607,6 +609,22 @@ PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigMkldnnBfloat16Enabled(
 PADDLE_CAPI_EXPORT extern void PD_ConfigSetBfloat16Op(
     __pd_keep PD_Config* pd_config, size_t ops_num, const char** op_list);
 ///
+/// \brief Turn on MKLDNN int8.
+///
+/// \param[in] pd_onfig config
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigEnableMkldnnInt8(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief A boolean state telling whether to use the MKLDNN int8.
+///
+/// \param[in] pd_onfig config
+/// \return Whether to use the MKLDNN int8.
+///
+PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigMkldnnInt8Enabled(
+    __pd_keep PD_Config* pd_config);
+
+///
 /// \brief Enable the GPU multi-computing stream feature.
 /// NOTE: The current behavior of this interface is to bind the computation
 /// stream to the thread, and this behavior may be changed in the future.
@@ -625,6 +643,12 @@ PADDLE_CAPI_EXPORT extern void PD_ConfigEnableGpuMultiStream(
 PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigThreadLocalStreamEnabled(
     __pd_keep PD_Config* pd_config);
 ///
+/// \brief Set execution stream. If not set a stream will be created
+/// internally.
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigSetExecStream(
+    __pd_keep PD_Config* pd_config, void* stream);
+///
 /// \brief Specify the memory buffer of program and parameter.
 /// Used when model and params are loaded directly from memory.
 ///
diff --git a/paddle/fluid/inference/capi_exp/pd_predictor.cc b/paddle/fluid/inference/capi_exp/pd_predictor.cc
index ef6447f7444c9cf2685be2fd230022dbee8f92f7..f0a16fee611dad0d5e5dd1037a846eeda34a184d 100644
--- a/paddle/fluid/inference/capi_exp/pd_predictor.cc
+++ b/paddle/fluid/inference/capi_exp/pd_predictor.cc
@@ -15,6 +15,7 @@
 #include "paddle/fluid/inference/capi_exp/pd_predictor.h"
 
 #include "paddle/fluid/inference/api/paddle_inference_api.h"
+#include "paddle/fluid/inference/capi_exp/pd_config.h"
 #include "paddle/fluid/inference/capi_exp/pd_types.h"
 #include "paddle/fluid/inference/capi_exp/pd_utils.h"
 #include "paddle/fluid/inference/capi_exp/types_internal.h"
@@ -38,7 +39,6 @@ __pd_give PD_Predictor* PD_PredictorCreate(__pd_take PD_Config* pd_config) {
   paddle_infer::Config* config =
       reinterpret_cast<paddle_infer::Config*>(pd_config);
   pd_predictor->predictor = paddle_infer::CreatePredictor(*config);
-  delete config;
   return pd_predictor;
 }
 
@@ -57,6 +57,30 @@ __pd_give PD_OneDimArrayCstr* PD_PredictorGetInputNames(
   return paddle_infer::CvtVecToOneDimArrayCstr(names);
 }
 
+__pd_give PD_IOInfos* PD_PredictorGetInputInfos(
+    __pd_keep PD_Predictor* pd_predictor) {
+  CHECK_AND_CONVERT_PD_PREDICTOR;
+  std::vector<std::string> names = predictor->GetInputNames();
+  std::map<std::string, std::vector<int64_t>> input_shapes =
+      predictor->GetInputTensorShape();
+  std::map<std::string, paddle_infer::DataType> input_dtypes =
+      predictor->GetInputTypes();
+
+  PD_IOInfos* input_infos = new PD_IOInfos;
+  input_infos->size = names.size();
+  input_infos->io_info = names.empty() ? NULL : new PD_IOInfo*[names.size()];
+  for (size_t i = 0; i < names.size(); i++) {
+    const std::string& name = names[i];
+    input_infos->io_info[i] = new PD_IOInfo;
+    input_infos->io_info[i]->name = paddle_infer::CvtStrToCstr(name);
+    input_infos->io_info[i]->shape =
+        paddle_infer::CvtVecToOneDimArrayInt64(input_shapes[name]);
+    input_infos->io_info[i]->dtype =
+        paddle_infer::CvtFromCxxDatatype(input_dtypes[name]);
+  }
+  return input_infos;
+}
+
 __pd_give PD_OneDimArrayCstr* PD_PredictorGetOutputNames(
     __pd_keep PD_Predictor* pd_predictor) {
   CHECK_AND_CONVERT_PD_PREDICTOR;
@@ -64,6 +88,30 @@ __pd_give PD_OneDimArrayCstr* PD_PredictorGetOutputNames(
   return paddle_infer::CvtVecToOneDimArrayCstr(names);
 }
 
+__pd_give PD_IOInfos* PD_PredictorGetOutputInfos(
+    __pd_keep PD_Predictor* pd_predictor) {
+  CHECK_AND_CONVERT_PD_PREDICTOR;
+  std::vector<std::string> names = predictor->GetOutputNames();
+  std::map<std::string, std::vector<int64_t>> output_shapes =
+      predictor->GetOutputTensorShape();
+  std::map<std::string, paddle_infer::DataType> output_dtypes =
+      predictor->GetOutputTypes();
+
+  PD_IOInfos* output_infos = new PD_IOInfos;
+  output_infos->size = names.size();
+  output_infos->io_info = names.empty() ? NULL : new PD_IOInfo*[names.size()];
+  for (size_t i = 0; i < names.size(); i++) {
+    const std::string& name = names[i];
+    output_infos->io_info[i] = new PD_IOInfo;
+    output_infos->io_info[i]->name = paddle_infer::CvtStrToCstr(name);
+    output_infos->io_info[i]->shape =
+        paddle_infer::CvtVecToOneDimArrayInt64(output_shapes[name]);
+    output_infos->io_info[i]->dtype =
+        paddle_infer::CvtFromCxxDatatype(output_dtypes[name]);
+  }
+  return output_infos;
+}
+
 size_t PD_PredictorGetInputNum(__pd_keep PD_Predictor* pd_predictor) {
   CHECK_AND_CONVERT_PD_PREDICTOR;
   return predictor->GetInputNames().size();
diff --git a/paddle/fluid/inference/capi_exp/pd_predictor.h b/paddle/fluid/inference/capi_exp/pd_predictor.h
index 33d5160bc3e0d1b1f14c2e9e34e1885ee8ae4f72..a35defb910070324be0c4e7c11305affb2fcd8e4 100644
--- a/paddle/fluid/inference/capi_exp/pd_predictor.h
+++ b/paddle/fluid/inference/capi_exp/pd_predictor.h
@@ -30,6 +30,7 @@ typedef struct PD_Predictor PD_Predictor;
 typedef struct PD_Config PD_Config;
 typedef struct PD_Tensor PD_Tensor;
 typedef struct PD_OneDimArrayCstr PD_OneDimArrayCstr;
+typedef struct PD_IOInfos PD_IOInfos;
 
 #ifdef __cplusplus
 extern "C" {
@@ -60,6 +61,14 @@ PADDLE_CAPI_EXPORT extern __pd_give PD_Predictor* PD_PredictorClone(
 PADDLE_CAPI_EXPORT extern __pd_give PD_OneDimArrayCstr*
 PD_PredictorGetInputNames(__pd_keep PD_Predictor* pd_predictor);
 ///
+/// \brief Get the input infos(name/shape/dtype)
+///
+/// \param[in] pd_predictor predictor
+/// \return input infos(name/shape/dtype)
+///
+PADDLE_CAPI_EXPORT extern __pd_give PD_IOInfos* PD_PredictorGetInputInfos(
+    __pd_keep PD_Predictor* pd_predictor);
+///
 /// \brief Get the output names
 ///
 /// \param[in] pd_predictor predictor
@@ -67,7 +76,14 @@ PD_PredictorGetInputNames(__pd_keep PD_Predictor* pd_predictor);
 ///
 PADDLE_CAPI_EXPORT extern __pd_give PD_OneDimArrayCstr*
 PD_PredictorGetOutputNames(__pd_keep PD_Predictor* pd_predictor);
-
+///
+/// \brief Get the output infos(name/shape/dtype)
+///
+/// \param[in] pd_predictor predictor
+/// \return output infos(name/shape/dtype)
+///
+PADDLE_CAPI_EXPORT extern __pd_give PD_IOInfos* PD_PredictorGetOutputInfos(
+    __pd_keep PD_Predictor* pd_predictor);
 ///
 /// \brief Get the input number
 ///
diff --git a/paddle/fluid/inference/capi_exp/pd_types.h b/paddle/fluid/inference/capi_exp/pd_types.h
index 62c54616535cfffb685d51f42ed88d454d9f1e81..b2391d538bcfd8589d29553f1f78a17256af4c60 100644
--- a/paddle/fluid/inference/capi_exp/pd_types.h
+++ b/paddle/fluid/inference/capi_exp/pd_types.h
@@ -29,6 +29,11 @@ typedef struct PD_OneDimArraySize {
   size_t* data;
 } PD_OneDimArraySize;  // std::vector<size_t>
 
+typedef struct PD_OneDimArrayInt64 {
+  size_t size;
+  int64_t* data;
+} PD_OneDimArrayInt64;  // std::vector<int64_t>
+
 typedef struct PD_OneDimArrayCstr {
   size_t size;
   char** data;
@@ -43,3 +48,14 @@ typedef struct PD_TwoDimArraySize {
   size_t size;
   PD_OneDimArraySize** data;
 } PD_TwoDimArraySize;  // std::vector<std::vector<size_t>>
+
+typedef struct PD_IOInfo {
+  PD_Cstr* name;
+  PD_OneDimArrayInt64* shape;
+  PD_DataType dtype;
+} PD_IOInfo;  // input or output info
+
+typedef struct PD_IOInfos {
+  size_t size;
+  PD_IOInfo** io_info;
+} PD_IOInfos;  // inputs or outputs info
diff --git a/paddle/fluid/inference/capi_exp/pd_utils.cc b/paddle/fluid/inference/capi_exp/pd_utils.cc
index 7942a860c4ee898c0f2a402eaae469b95e549ead..b455e79dbd6763c0522f53d314bcbb9c3290882a 100644
--- a/paddle/fluid/inference/capi_exp/pd_utils.cc
+++ b/paddle/fluid/inference/capi_exp/pd_utils.cc
@@ -11,12 +11,10 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-
-#include "paddle/fluid/inference/capi_exp/pd_utils.h"
-
 #include <string>
 
 #include "paddle/fluid/inference/api/paddle_inference_api.h"
+#include "paddle/fluid/inference/capi_exp/pd_utils.h"
 #include "paddle/fluid/inference/capi_exp/utils_internal.h"
 #include "paddle/fluid/platform/enforce.h"
 
@@ -62,6 +60,7 @@
 
 ONE_DIM_ARRAY_UTILS_FUNC_IMPL(int32_t, Int32, int)
 ONE_DIM_ARRAY_UTILS_FUNC_IMPL(size_t, Size, size_t)
+ONE_DIM_ARRAY_UTILS_FUNC_IMPL(int64_t, Int64, int64_t)
 
 #undef ONE_DIM_ARRAY_UTILS_FUNC_IMPL
 #undef CONVERT_ONE_DIM_ARRAY_TO_VEC
@@ -178,6 +177,38 @@ TWO_DIM_ARRAY_UTILS_FUNC_IMPL(size_t, Size, size_t)
 #undef CONVERT_VEC_TO_TWO_DIM_ARRAY
 #undef DESTROY_TWO_DIM_ARRAY
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void PD_IOInfoDestroy(__pd_take PD_IOInfo* io_info) {
+  if (io_info != NULL) {
+    PD_CstrDestroy(io_info->name);
+    io_info->name = NULL;
+    PD_OneDimArrayInt64Destroy(io_info->shape);
+    io_info->shape = NULL;
+    delete io_info;
+  }
+}
+
+void PD_IOInfosDestroy(__pd_take PD_IOInfos* io_infos) {
+  if (io_infos != NULL) {
+    if (io_infos->size != 0) {
+      for (size_t index = 0; index < io_infos->size; ++index) {
+        PD_IOInfoDestroy(io_infos->io_info[index]);
+      }
+      io_infos->size = 0;
+    }
+    delete[] io_infos->io_info;
+    io_infos->io_info = NULL;
+    delete io_infos;
+  }
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 namespace paddle_infer {
 
 PlaceType CvtToCxxPlaceType(PD_PlaceType place_type) {
diff --git a/paddle/fluid/inference/capi_exp/pd_utils.h b/paddle/fluid/inference/capi_exp/pd_utils.h
index 8743c58db76c92b620d0a5b82318e43d52b524ee..0b1abcc82f4266b4c905da7d777484306cdad9ef 100644
--- a/paddle/fluid/inference/capi_exp/pd_utils.h
+++ b/paddle/fluid/inference/capi_exp/pd_utils.h
@@ -41,6 +41,14 @@ extern "C" {
 PADDLE_CAPI_EXPORT extern void PD_OneDimArrayInt32Destroy(
     __pd_take PD_OneDimArrayInt32* array);
 
+///
+/// \brief Destroy the PD_OneDimArrayInt64 object pointed to by the pointer.
+///
+/// \param[in] array pointer to the PD_OneDimArrayInt64 object.
+///
+PADDLE_CAPI_EXPORT extern void PD_OneDimArrayInt64Destroy(
+    __pd_take PD_OneDimArrayInt64* array);
+
 ///
 /// \brief Destroy the PD_OneDimArrayCstr object pointed to by the pointer.
 ///
@@ -74,6 +82,21 @@ PADDLE_CAPI_EXPORT extern void PD_TwoDimArraySizeDestroy(
 ///
 PADDLE_CAPI_EXPORT extern void PD_CstrDestroy(__pd_take PD_Cstr* cstr);
 
+///
+/// \brief Destroy the PD_IOInfo object pointed to by the pointer.
+///
+/// \param[in] cstr pointer to the PD_IOInfo object.
+///
+PADDLE_CAPI_EXPORT extern void PD_IOInfoDestroy(__pd_take PD_IOInfo* io_info);
+
+///
+/// \brief Destroy the PD_IOInfos object pointed to by the pointer.
+///
+/// \param[in] cstr pointer to the PD_IOInfos object.
+///
+PADDLE_CAPI_EXPORT extern void PD_IOInfosDestroy(
+    __pd_take PD_IOInfos* io_infos);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/paddle/fluid/inference/capi_exp/utils_internal.h b/paddle/fluid/inference/capi_exp/utils_internal.h
index 95b16dbd59943bb34a2f0d348b03b2d21cd99b23..e7d73331a6b7472cb7e7dd059607981d434354b1 100644
--- a/paddle/fluid/inference/capi_exp/utils_internal.h
+++ b/paddle/fluid/inference/capi_exp/utils_internal.h
@@ -44,6 +44,16 @@ namespace paddle_infer {
 __pd_give PD_OneDimArrayInt32* CvtVecToOneDimArrayInt32(
     const std::vector<int>& vec);
 
+///
+/// \brief Convert the 'std::vector<int64_t>' object to a 'PD_OneDimArrayInt64'
+/// object.
+///
+/// \param[in] vec source object.
+/// \return target object.
+///
+__pd_give PD_OneDimArrayInt64* CvtVecToOneDimArrayInt64(
+    const std::vector<int64_t>& vec);
+
 ///
 /// \brief Convert the 'PD_OneDimArrayInt32' object to a 'std::vector<int>'
 /// object.
@@ -54,6 +64,16 @@ __pd_give PD_OneDimArrayInt32* CvtVecToOneDimArrayInt32(
 std::vector<int> CvtOneDimArrayToVecInt32(
     __pd_keep const PD_OneDimArrayInt32* array);
 
+///
+/// \brief Convert the 'PD_OneDimArrayInt64' object to a 'std::vector<int64_t>'
+/// object.
+///
+/// \param[in] array source object.
+/// \return target object.
+///
+std::vector<int64_t> CvtOneDimArrayToVecInt64(
+    __pd_keep const PD_OneDimArrayInt64* array);
+
 ///
 /// \brief Convert the 'std::vector<size_t>' object to a 'PD_OneDimArraySize'
 /// object.
diff --git a/paddle/fluid/inference/experimental/javaapi/native/com_baidu_paddle_inference_Config.cpp b/paddle/fluid/inference/experimental/javaapi/native/com_baidu_paddle_inference_Config.cpp
index 285feebb90f60e57273019a9d40d546b33c3d958..0d585f938be8c8dd958e36e0d751de33d2f642c4 100644
--- a/paddle/fluid/inference/experimental/javaapi/native/com_baidu_paddle_inference_Config.cpp
+++ b/paddle/fluid/inference/experimental/javaapi/native/com_baidu_paddle_inference_Config.cpp
@@ -1,272 +1,273 @@
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "com_baidu_paddle_inference_Config.h"
-
-#include <iostream>
-
-#include "jni_convert_util.h"  // NOLINT
-#include "pd_inference_api.h"  // NOLINT
-
-JNIEXPORT void JNICALL Java_com_baidu_paddle_inference_Config_cppConfigDestroy(
-    JNIEnv*, jobject, jlong cppPaddleConfigPointer) {
-  PD_ConfigDestroy(reinterpret_cast<PD_Config*>(cppPaddleConfigPointer));
-}
-
-// 1. create Config
-
-JNIEXPORT jlong JNICALL Java_com_baidu_paddle_inference_Config_createCppConfig(
-    JNIEnv* env, jobject obj) {
-  jlong cppPaddleConfigPointer = reinterpret_cast<jlong>(PD_ConfigCreate());
-  return cppPaddleConfigPointer;
-}
-
-JNIEXPORT jboolean JNICALL
-Java_com_baidu_paddle_inference_Config_isCppConfigValid(
-    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
-  bool flag =
-      PD_ConfigIsValid(reinterpret_cast<PD_Config*>(cppPaddleConfigPointer));
-  return cpp_bool_to_jboolean(env, flag);
-}
-
-// 2. not combined model settings
-
-JNIEXPORT void JNICALL
-Java_com_baidu_paddle_inference_Config_setCppModel(JNIEnv* env,
-                                                   jobject obj,
-                                                   jlong cppPaddleConfigPointer,
-                                                   jstring modelFile,
-                                                   jstring paramsFile) {
-  PD_ConfigSetModel(reinterpret_cast<PD_Config*>(cppPaddleConfigPointer),
-                    jstring_to_cpp_string(env, modelFile).c_str(),
-                    jstring_to_cpp_string(env, paramsFile).c_str());
-}
-
-// 3. combined model settings
-
-JNIEXPORT void JNICALL Java_com_baidu_paddle_inference_Config_setCppModelDir(
-    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer, jstring modelDir) {
-  PD_ConfigSetModelDir(reinterpret_cast<PD_Config*>(cppPaddleConfigPointer),
-                       jstring_to_cpp_string(env, modelDir).c_str());
-}
-
-JNIEXPORT void JNICALL Java_com_baidu_paddle_inference_Config_setCppProgFile(
-    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer, jstring progFile) {
-  PD_ConfigSetProgFile(reinterpret_cast<PD_Config*>(cppPaddleConfigPointer),
-                       jstring_to_cpp_string(env, progFile).c_str());
-}
-
-JNIEXPORT void JNICALL Java_com_baidu_paddle_inference_Config_setCppParamsFile(
-    JNIEnv* env,
-    jobject obj,
-    jlong cppPaddleConfigPointer,
-    jstring paramsFile) {
-  PD_ConfigSetParamsFile(reinterpret_cast<PD_Config*>(cppPaddleConfigPointer),
-                         jstring_to_cpp_string(env, paramsFile).c_str());
-}
-
-JNIEXPORT jstring JNICALL Java_com_baidu_paddle_inference_Config_modelDir(
-    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
-  return cpp_string_to_jstring(
-      env,
-      PD_ConfigGetModelDir(
-          reinterpret_cast<PD_Config*>(cppPaddleConfigPointer)));
-}
-
-JNIEXPORT jstring JNICALL Java_com_baidu_paddle_inference_Config_progFile(
-    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
-  return cpp_string_to_jstring(
-      env,
-      PD_ConfigGetProgFile(
-          reinterpret_cast<PD_Config*>(cppPaddleConfigPointer)));
-}
-
-JNIEXPORT jstring JNICALL Java_com_baidu_paddle_inference_Config_paramsFile(
-    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
-  return cpp_string_to_jstring(
-      env,
-      PD_ConfigGetParamsFile(
-          reinterpret_cast<PD_Config*>(cppPaddleConfigPointer)));
-}
-
-// 4. cpu settings
-
-JNIEXPORT void JNICALL
-Java_com_baidu_paddle_inference_Config_setCpuMathLibraryNumThreads(
-    JNIEnv* env,
-    jobject obj,
-    jlong cppPaddleConfigPointer,
-    jint mathThreadsNum) {
-  int math_threads_num = reinterpret_cast<int>(mathThreadsNum);
-  PD_ConfigSetCpuMathLibraryNumThreads(
-      reinterpret_cast<PD_Config*>(cppPaddleConfigPointer), math_threads_num);
-}
-
-JNIEXPORT jint JNICALL
-Java_com_baidu_paddle_inference_Config_cpuMathLibraryNumThreads(
-    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
-  jint mathThreadsNum =
-      reinterpret_cast<jint>(PD_ConfigGetCpuMathLibraryNumThreads(
-          reinterpret_cast<PD_Config*>(cppPaddleConfigPointer)));
-  return mathThreadsNum;
-}
-
-// 5. MKLDNN settings
-
-JNIEXPORT void JNICALL Java_com_baidu_paddle_inference_Config_enableMKLDNN(
-    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
-  PD_ConfigEnableMKLDNN(reinterpret_cast<PD_Config*>(cppPaddleConfigPointer));
-}
-
-JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_inference_Config_mkldnnEnabled(
-    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
-  bool flag = PD_ConfigMkldnnEnabled(
-      reinterpret_cast<PD_Config*>(cppPaddleConfigPointer));
-  return cpp_bool_to_jboolean(env, flag);
-}
-
-JNIEXPORT void JNICALL
-Java_com_baidu_paddle_inference_Config_enableMkldnnBfloat16(
-    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
-  PD_ConfigEnableMkldnnBfloat16(
-      reinterpret_cast<PD_Config*>(cppPaddleConfigPointer));
-}
-
-JNIEXPORT jboolean JNICALL
-Java_com_baidu_paddle_inference_Config_mkldnnBfloat16Enabled(
-    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
-  bool flag = PD_ConfigMkldnnBfloat16Enabled(
-      reinterpret_cast<PD_Config*>(cppPaddleConfigPointer));
-  return cpp_bool_to_jboolean(env, flag);
-}
-
-// 6. gpu setting
-
-JNIEXPORT void JNICALL Java_com_baidu_paddle_inference_Config_enableUseGpu(
-    JNIEnv* env,
-    jobject obj,
-    jlong cppPaddleConfigPointer,
-    jlong memorySize,
-    jint deviceId) {
-  PD_ConfigEnableUseGpu(reinterpret_cast<PD_Config*>(cppPaddleConfigPointer),
-                        (uint64_t)memorySize,
-                        (int32_t)deviceId);
-}
-
-JNIEXPORT void JNICALL Java_com_baidu_paddle_inference_Config_disableGpu(
-    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
-  PD_ConfigDisableGpu(reinterpret_cast<PD_Config*>(cppPaddleConfigPointer));
-}
-
-JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_inference_Config_useGpu(
-    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
-  bool flag =
-      PD_ConfigUseGpu(reinterpret_cast<PD_Config*>(cppPaddleConfigPointer));
-  return cpp_bool_to_jboolean(env, flag);
-}
-
-JNIEXPORT jint JNICALL Java_com_baidu_paddle_inference_Config_gpuDeviceId(
-    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
-  int device_id = PD_ConfigGpuDeviceId(
-      reinterpret_cast<PD_Config*>(cppPaddleConfigPointer));
-  return reinterpret_cast<jint>(device_id);
-}
-
-JNIEXPORT jint JNICALL
-Java_com_baidu_paddle_inference_Config_memoryPoolInitSizeMb(
-    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
-  int memory_pool_init_size_mb = PD_ConfigMemoryPoolInitSizeMb(
-      reinterpret_cast<PD_Config*>(cppPaddleConfigPointer));
-  return reinterpret_cast<jint>(memory_pool_init_size_mb);
-}
-
-JNIEXPORT jfloat JNICALL
-Java_com_baidu_paddle_inference_Config_fractionOfGpuMemoryForPool(
-    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
-  float fraction_of_gpuMemory_for_pool = PD_ConfigFractionOfGpuMemoryForPool(
-      reinterpret_cast<PD_Config*>(cppPaddleConfigPointer));
-  return (jfloat)fraction_of_gpuMemory_for_pool;
-}
-
-// 7. TensorRT To Do
-
-// 8. optim setting
-
-JNIEXPORT void JNICALL Java_com_baidu_paddle_inference_Config_switchIrOptim(
-    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer, jboolean flag) {
-  PD_ConfigSwitchIrOptim(reinterpret_cast<PD_Config*>(cppPaddleConfigPointer),
-                         jboolean_to_cpp_bool(env, flag));
-}
-
-JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_inference_Config_irOptim(
-    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
-  bool flag =
-      PD_ConfigIrOptim(reinterpret_cast<PD_Config*>(cppPaddleConfigPointer));
-  return cpp_bool_to_jboolean(env, flag);
-}
-
-JNIEXPORT void JNICALL Java_com_baidu_paddle_inference_Config_switchIrDebug(
-    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer, jboolean flag) {
-  PD_ConfigSwitchIrDebug(reinterpret_cast<PD_Config*>(cppPaddleConfigPointer),
-                         jboolean_to_cpp_bool(env, flag));
-}
-
-// 9. enable memory optimization
-
-JNIEXPORT void JNICALL Java_com_baidu_paddle_inference_Config_enableMemoryOptim(
-    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer, jboolean flag) {
-  PD_ConfigEnableMemoryOptim(
-      reinterpret_cast<PD_Config*>(cppPaddleConfigPointer),
-      jboolean_to_cpp_bool(env, flag));
-}
-
-JNIEXPORT jboolean JNICALL
-Java_com_baidu_paddle_inference_Config_memoryOptimEnabled(
-    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
-  bool flag = PD_ConfigMemoryOptimEnabled(
-      reinterpret_cast<PD_Config*>(cppPaddleConfigPointer));
-  return cpp_bool_to_jboolean(env, flag);
-}
-
-// 10. profile setting
-
-JNIEXPORT void JNICALL Java_com_baidu_paddle_inference_Config_enableProfile(
-    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
-  PD_ConfigEnableProfile(reinterpret_cast<PD_Config*>(cppPaddleConfigPointer));
-}
-
-JNIEXPORT jboolean JNICALL
-Java_com_baidu_paddle_inference_Config_profileEnabled(
-    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
-  bool flag = PD_ConfigProfileEnabled(
-      reinterpret_cast<PD_Config*>(cppPaddleConfigPointer));
-  return cpp_bool_to_jboolean(env, flag);
-}
-
-// 11. log setting
-
-JNIEXPORT void JNICALL Java_com_baidu_paddle_inference_Config_disableGlogInfo(
-    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
-  PD_ConfigDisableGlogInfo(
-      reinterpret_cast<PD_Config*>(cppPaddleConfigPointer));
-}
-
-// 12. view config configuration
-
-JNIEXPORT jstring JNICALL Java_com_baidu_paddle_inference_Config_summary(
-    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
-  return cpp_string_to_jstring(
-      env,
-      PD_ConfigSummary(reinterpret_cast<PD_Config*>(cppPaddleConfigPointer)));
-}
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "com_baidu_paddle_inference_Config.h"
+
+#include <iostream>
+
+#include "jni_convert_util.h"  // NOLINT
+#include "pd_inference_api.h"  // NOLINT
+
+JNIEXPORT void JNICALL Java_com_baidu_paddle_inference_Config_cppConfigDestroy(
+    JNIEnv*, jobject, jlong cppPaddleConfigPointer) {
+  PD_ConfigDestroy(reinterpret_cast<PD_Config*>(cppPaddleConfigPointer));
+}
+
+// 1. create Config
+
+JNIEXPORT jlong JNICALL Java_com_baidu_paddle_inference_Config_createCppConfig(
+    JNIEnv* env, jobject obj) {
+  jlong cppPaddleConfigPointer = reinterpret_cast<jlong>(PD_ConfigCreate());
+  return cppPaddleConfigPointer;
+}
+
+JNIEXPORT jboolean JNICALL
+Java_com_baidu_paddle_inference_Config_isCppConfigValid(
+    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
+  bool flag =
+      PD_ConfigIsValid(reinterpret_cast<PD_Config*>(cppPaddleConfigPointer));
+  return cpp_bool_to_jboolean(env, flag);
+}
+
+// 2. not combined model settings
+
+JNIEXPORT void JNICALL
+Java_com_baidu_paddle_inference_Config_setCppModel(JNIEnv* env,
+                                                   jobject obj,
+                                                   jlong cppPaddleConfigPointer,
+                                                   jstring modelFile,
+                                                   jstring paramsFile) {
+  PD_ConfigSetModel(reinterpret_cast<PD_Config*>(cppPaddleConfigPointer),
+                    jstring_to_cpp_string(env, modelFile).c_str(),
+                    jstring_to_cpp_string(env, paramsFile).c_str());
+}
+
+// 3. combined model settings
+
+JNIEXPORT void JNICALL Java_com_baidu_paddle_inference_Config_setCppModelDir(
+    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer, jstring modelDir) {
+  PD_ConfigSetModelDir(reinterpret_cast<PD_Config*>(cppPaddleConfigPointer),
+                       jstring_to_cpp_string(env, modelDir).c_str());
+}
+
+JNIEXPORT void JNICALL Java_com_baidu_paddle_inference_Config_setCppProgFile(
+    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer, jstring progFile) {
+  PD_ConfigSetProgFile(reinterpret_cast<PD_Config*>(cppPaddleConfigPointer),
+                       jstring_to_cpp_string(env, progFile).c_str());
+}
+
+JNIEXPORT void JNICALL Java_com_baidu_paddle_inference_Config_setCppParamsFile(
+    JNIEnv* env,
+    jobject obj,
+    jlong cppPaddleConfigPointer,
+    jstring paramsFile) {
+  PD_ConfigSetParamsFile(reinterpret_cast<PD_Config*>(cppPaddleConfigPointer),
+                         jstring_to_cpp_string(env, paramsFile).c_str());
+}
+
+JNIEXPORT jstring JNICALL Java_com_baidu_paddle_inference_Config_modelDir(
+    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
+  return cpp_string_to_jstring(
+      env,
+      PD_ConfigGetModelDir(
+          reinterpret_cast<PD_Config*>(cppPaddleConfigPointer)));
+}
+
+JNIEXPORT jstring JNICALL Java_com_baidu_paddle_inference_Config_progFile(
+    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
+  return cpp_string_to_jstring(
+      env,
+      PD_ConfigGetProgFile(
+          reinterpret_cast<PD_Config*>(cppPaddleConfigPointer)));
+}
+
+JNIEXPORT jstring JNICALL Java_com_baidu_paddle_inference_Config_paramsFile(
+    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
+  return cpp_string_to_jstring(
+      env,
+      PD_ConfigGetParamsFile(
+          reinterpret_cast<PD_Config*>(cppPaddleConfigPointer)));
+}
+
+// 4. cpu settings
+
+JNIEXPORT void JNICALL
+Java_com_baidu_paddle_inference_Config_setCpuMathLibraryNumThreads(
+    JNIEnv* env,
+    jobject obj,
+    jlong cppPaddleConfigPointer,
+    jint mathThreadsNum) {
+  int math_threads_num = reinterpret_cast<int>(mathThreadsNum);
+  PD_ConfigSetCpuMathLibraryNumThreads(
+      reinterpret_cast<PD_Config*>(cppPaddleConfigPointer), math_threads_num);
+}
+
+JNIEXPORT jint JNICALL
+Java_com_baidu_paddle_inference_Config_cpuMathLibraryNumThreads(
+    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
+  jint mathThreadsNum =
+      reinterpret_cast<jint>(PD_ConfigGetCpuMathLibraryNumThreads(
+          reinterpret_cast<PD_Config*>(cppPaddleConfigPointer)));
+  return mathThreadsNum;
+}
+
+// 5. MKLDNN settings
+
+JNIEXPORT void JNICALL Java_com_baidu_paddle_inference_Config_enableMKLDNN(
+    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
+  PD_ConfigEnableMKLDNN(reinterpret_cast<PD_Config*>(cppPaddleConfigPointer));
+}
+
+JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_inference_Config_mkldnnEnabled(
+    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
+  bool flag = PD_ConfigMkldnnEnabled(
+      reinterpret_cast<PD_Config*>(cppPaddleConfigPointer));
+  return cpp_bool_to_jboolean(env, flag);
+}
+
+JNIEXPORT void JNICALL
+Java_com_baidu_paddle_inference_Config_enableMkldnnBfloat16(
+    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
+  PD_ConfigEnableMkldnnBfloat16(
+      reinterpret_cast<PD_Config*>(cppPaddleConfigPointer));
+}
+
+JNIEXPORT jboolean JNICALL
+Java_com_baidu_paddle_inference_Config_mkldnnBfloat16Enabled(
+    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
+  bool flag = PD_ConfigMkldnnBfloat16Enabled(
+      reinterpret_cast<PD_Config*>(cppPaddleConfigPointer));
+  return cpp_bool_to_jboolean(env, flag);
+}
+
+// 6. gpu setting
+
+JNIEXPORT void JNICALL Java_com_baidu_paddle_inference_Config_enableUseGpu(
+    JNIEnv* env,
+    jobject obj,
+    jlong cppPaddleConfigPointer,
+    jlong memorySize,
+    jint deviceId) {
+  PD_ConfigEnableUseGpu(reinterpret_cast<PD_Config*>(cppPaddleConfigPointer),
+                        (uint64_t)memorySize,
+                        (int32_t)deviceId,
+                        0);
+}
+
+JNIEXPORT void JNICALL Java_com_baidu_paddle_inference_Config_disableGpu(
+    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
+  PD_ConfigDisableGpu(reinterpret_cast<PD_Config*>(cppPaddleConfigPointer));
+}
+
+JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_inference_Config_useGpu(
+    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
+  bool flag =
+      PD_ConfigUseGpu(reinterpret_cast<PD_Config*>(cppPaddleConfigPointer));
+  return cpp_bool_to_jboolean(env, flag);
+}
+
+JNIEXPORT jint JNICALL Java_com_baidu_paddle_inference_Config_gpuDeviceId(
+    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
+  int device_id = PD_ConfigGpuDeviceId(
+      reinterpret_cast<PD_Config*>(cppPaddleConfigPointer));
+  return reinterpret_cast<jint>(device_id);
+}
+
+JNIEXPORT jint JNICALL
+Java_com_baidu_paddle_inference_Config_memoryPoolInitSizeMb(
+    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
+  int memory_pool_init_size_mb = PD_ConfigMemoryPoolInitSizeMb(
+      reinterpret_cast<PD_Config*>(cppPaddleConfigPointer));
+  return reinterpret_cast<jint>(memory_pool_init_size_mb);
+}
+
+JNIEXPORT jfloat JNICALL
+Java_com_baidu_paddle_inference_Config_fractionOfGpuMemoryForPool(
+    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
+  float fraction_of_gpuMemory_for_pool = PD_ConfigFractionOfGpuMemoryForPool(
+      reinterpret_cast<PD_Config*>(cppPaddleConfigPointer));
+  return (jfloat)fraction_of_gpuMemory_for_pool;
+}
+
+// 7. TensorRT To Do
+
+// 8. optim setting
+
+JNIEXPORT void JNICALL Java_com_baidu_paddle_inference_Config_switchIrOptim(
+    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer, jboolean flag) {
+  PD_ConfigSwitchIrOptim(reinterpret_cast<PD_Config*>(cppPaddleConfigPointer),
+                         jboolean_to_cpp_bool(env, flag));
+}
+
+JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_inference_Config_irOptim(
+    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
+  bool flag =
+      PD_ConfigIrOptim(reinterpret_cast<PD_Config*>(cppPaddleConfigPointer));
+  return cpp_bool_to_jboolean(env, flag);
+}
+
+JNIEXPORT void JNICALL Java_com_baidu_paddle_inference_Config_switchIrDebug(
+    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer, jboolean flag) {
+  PD_ConfigSwitchIrDebug(reinterpret_cast<PD_Config*>(cppPaddleConfigPointer),
+                         jboolean_to_cpp_bool(env, flag));
+}
+
+// 9. enable memory optimization
+
+JNIEXPORT void JNICALL Java_com_baidu_paddle_inference_Config_enableMemoryOptim(
+    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer, jboolean flag) {
+  PD_ConfigEnableMemoryOptim(
+      reinterpret_cast<PD_Config*>(cppPaddleConfigPointer),
+      jboolean_to_cpp_bool(env, flag));
+}
+
+JNIEXPORT jboolean JNICALL
+Java_com_baidu_paddle_inference_Config_memoryOptimEnabled(
+    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
+  bool flag = PD_ConfigMemoryOptimEnabled(
+      reinterpret_cast<PD_Config*>(cppPaddleConfigPointer));
+  return cpp_bool_to_jboolean(env, flag);
+}
+
+// 10. profile setting
+
+JNIEXPORT void JNICALL Java_com_baidu_paddle_inference_Config_enableProfile(
+    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
+  PD_ConfigEnableProfile(reinterpret_cast<PD_Config*>(cppPaddleConfigPointer));
+}
+
+JNIEXPORT jboolean JNICALL
+Java_com_baidu_paddle_inference_Config_profileEnabled(
+    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
+  bool flag = PD_ConfigProfileEnabled(
+      reinterpret_cast<PD_Config*>(cppPaddleConfigPointer));
+  return cpp_bool_to_jboolean(env, flag);
+}
+
+// 11. log setting
+
+JNIEXPORT void JNICALL Java_com_baidu_paddle_inference_Config_disableGlogInfo(
+    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
+  PD_ConfigDisableGlogInfo(
+      reinterpret_cast<PD_Config*>(cppPaddleConfigPointer));
+}
+
+// 12. view config configuration
+
+JNIEXPORT jstring JNICALL Java_com_baidu_paddle_inference_Config_summary(
+    JNIEnv* env, jobject obj, jlong cppPaddleConfigPointer) {
+  return cpp_string_to_jstring(
+      env,
+      PD_ConfigSummary(reinterpret_cast<PD_Config*>(cppPaddleConfigPointer)));
+}
diff --git a/paddle/fluid/inference/goapi/config.go b/paddle/fluid/inference/goapi/config.go
index 508ac635295605c3c4b3b3f0c1f0437b90b3d22f..72c5ab078c83d596e6e535ddb4424c984a73aea6 100644
--- a/paddle/fluid/inference/goapi/config.go
+++ b/paddle/fluid/inference/goapi/config.go
@@ -157,7 +157,7 @@ func (config *Config) UseFcPadding() bool {
 /// \param deviceId the GPU card to use.
 ///
 func (config *Config) EnableUseGpu(memorySize uint64, deviceId int32) {
-	C.PD_ConfigEnableUseGpu(config.c, C.uint64_t(memorySize), C.int32_t(deviceId))
+	C.PD_ConfigEnableUseGpu(config.c, C.uint64_t(memorySize), C.int32_t(deviceId), 0)
 }
 
 ///
diff --git a/paddle/fluid/inference/tensorrt/convert/cast_op.cc b/paddle/fluid/inference/tensorrt/convert/cast_op.cc
index db986c11f0be7a3a92d59066b211c2ecc6854efd..3a5f5e9e1937fdddcb2f11ce55c9a5cfc29d375e 100644
--- a/paddle/fluid/inference/tensorrt/convert/cast_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/cast_op.cc
@@ -46,6 +46,7 @@ class CastOpConverter : public OpConverter {
         layer->setOutputType(0, nvinfer1::DataType::kBOOL);
         break;
       case 2:  // INT32 = 2
+      case 3:  // INT64 = 3 there is no int64 in tensorrt subgraph
         layer->setOutputType(0, nvinfer1::DataType::kINT32);
         break;
       case 4:  // FP16 = 4
diff --git a/paddle/fluid/inference/tests/api/analyzer_capi_exp_gpu_tester.cc b/paddle/fluid/inference/tests/api/analyzer_capi_exp_gpu_tester.cc
index e0b580fe70abfb1846b0201565123d0cc28865e6..7faf06567dd577c7773b3313c5d5d28c1f2b0170 100644
--- a/paddle/fluid/inference/tests/api/analyzer_capi_exp_gpu_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_capi_exp_gpu_tester.cc
@@ -19,6 +19,10 @@ limitations under the License. */
 #include <string>
 #include <vector>
 
+#if defined(PADDLE_WITH_CUDA)
+#include <cuda_runtime.h>
+#endif
+
 #include "paddle/fluid/inference/capi_exp/pd_inference_api.h"
 #include "paddle/fluid/inference/tests/api/tester_helper.h"
 
@@ -37,7 +41,7 @@ TEST(PD_Config, gpu_interface) {
   PD_ConfigSetModel(config, prog_file.c_str(), param_file.c_str());
   PD_ConfigSetOptimCacheDir(config, opt_cache_dir.c_str());
 
-  PD_ConfigEnableUseGpu(config, 100, 0);
+  PD_ConfigEnableUseGpu(config, 100, 0, 0);
   bool use_gpu = PD_ConfigUseGpu(config);
   EXPECT_TRUE(use_gpu);
   int init_size = PD_ConfigMemoryPoolInitSizeMb(config);
@@ -84,6 +88,14 @@ TEST(PD_Config, gpu_interface) {
   bool thread_local_thread = PD_ConfigThreadLocalStreamEnabled(config);
   EXPECT_TRUE(thread_local_thread);
 
+#if defined(PADDLE_WITH_CUDA)
+  {
+    cudaStream_t external_stream;
+    cudaStreamCreate(&external_stream);
+    PD_ConfigSetExecStream(config, external_stream);
+  }
+#endif
+
   PD_ConfigDisableGpu(config);
   PD_ConfigDestroy(config);
 }
@@ -104,7 +116,7 @@ TEST(PD_Config, use_gpu) {
   const char* model_dir_ = PD_ConfigGetModelDir(config);
   LOG(INFO) << model_dir_;
 
-  PD_ConfigEnableUseGpu(config, 100, 0);
+  PD_ConfigEnableUseGpu(config, 100, 0, 0);
   bool use_gpu = PD_ConfigUseGpu(config);
   EXPECT_TRUE(use_gpu);
   int device_id = PD_ConfigGpuDeviceId(config);
@@ -142,7 +154,7 @@ TEST(PD_Config, use_gpu) {
 TEST(PD_Config, trt_int8) {
   std::string model_dir = FLAGS_infer_model + "/mobilenet";
   PD_Config* config = PD_ConfigCreate();
-  PD_ConfigEnableUseGpu(config, 100, 0);
+  PD_ConfigEnableUseGpu(config, 100, 0, 0);
   PD_ConfigEnableTensorRtEngine(
       config, 1 << 20, 1, 3, PD_PRECISION_INT8, FALSE, TRUE);
   bool trt_enable = PD_ConfigTensorRtEngineEnabled(config);
@@ -153,7 +165,7 @@ TEST(PD_Config, trt_int8) {
 TEST(PD_Config, trt_fp16) {
   std::string model_dir = FLAGS_infer_model + "/mobilenet";
   PD_Config* config = PD_ConfigCreate();
-  PD_ConfigEnableUseGpu(config, 100, 0);
+  PD_ConfigEnableUseGpu(config, 100, 0, 0);
   PD_ConfigEnableTensorRtEngine(
       config, 1 << 20, 1, 3, PD_PRECISION_HALF, FALSE, FALSE);
   bool trt_enable = PD_ConfigTensorRtEngineEnabled(config);
diff --git a/paddle/fluid/inference/tests/api/analyzer_capi_exp_int_tester.cc b/paddle/fluid/inference/tests/api/analyzer_capi_exp_int_tester.cc
index dfcf5fda4763eb0ff4457552465e5946597e20a7..b83e5d68cca91bec3eedf070396701a854d032b7 100644
--- a/paddle/fluid/inference/tests/api/analyzer_capi_exp_int_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_capi_exp_int_tester.cc
@@ -37,6 +37,9 @@ void predictor_run() {
   PD_OneDimArrayCstr* input_names = PD_PredictorGetInputNames(predictor);
   LOG(INFO) << "The inputs' size is: " << input_names->size;
   EXPECT_EQ(input_names->size, 2u);
+  PD_IOInfos* in_infos = PD_PredictorGetInputInfos(predictor);
+  EXPECT_EQ(in_infos->size, 2u);
+  PD_IOInfos* out_infos = PD_PredictorGetOutputInfos(predictor);
 
   int32_t shape_0[4] = {1, 3, 224, 224};
   float data_0[1 * 3 * 224 * 224] = {0};
@@ -79,6 +82,8 @@ void predictor_run() {
   PD_TensorDestroy(input_1);
   PD_TensorDestroy(input_0);
   PD_OneDimArrayCstrDestroy(input_names);
+  PD_IOInfosDestroy(in_infos);
+  PD_IOInfosDestroy(out_infos);
   PD_PredictorDestroy(predictor);
 }
 
diff --git a/paddle/fluid/inference/tests/api/analyzer_capi_exp_pd_config_tester.cc b/paddle/fluid/inference/tests/api/analyzer_capi_exp_pd_config_tester.cc
index 804ac21ea406c8b0755a875ce7be012394d7acf9..6bce6b0d9df48712c0b33b51b4c1e50295c6df83 100644
--- a/paddle/fluid/inference/tests/api/analyzer_capi_exp_pd_config_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_capi_exp_pd_config_tester.cc
@@ -85,6 +85,10 @@ TEST(PD_Config, interface) {
 
   PD_ConfigEnableMkldnnBfloat16(config);
   PD_ConfigSetBfloat16Op(config, 1, &ops_name);
+
+  PD_ConfigEnableMkldnnInt8(config);
+  bool mkldnn_int8_enabled = PD_ConfigMkldnnInt8Enabled(config);
+  EXPECT_TRUE(mkldnn_int8_enabled);
 #endif
 
   PD_ConfigEnableONNXRuntime(config);
diff --git a/paddle/fluid/memory/allocation/mmap_allocator.cc b/paddle/fluid/memory/allocation/mmap_allocator.cc
index 55529b58aeb5f1cb4efe6aedd9c582b5f174e4c3..c0747a12984f41cbfc2a2e5a540b7cf22f7580fb 100644
--- a/paddle/fluid/memory/allocation/mmap_allocator.cc
+++ b/paddle/fluid/memory/allocation/mmap_allocator.cc
@@ -198,8 +198,7 @@ void RefcountedMemoryMapAllocation::close() {
       MemoryMapAllocationPool::Instance().Insert(MemoryMapInfo(
           flags_, map_size_ - mmap_alignment, ipc_name_, map_ptr_));
     } else {
-      if (info->refcount == 0 &&
-          shm_open(ipc_name_.c_str(), O_RDWR, (mode_t)0600) != -1) {
+      if (info->refcount == 0) {
         shm_unlink(ipc_name_.c_str());
         VLOG(6) << "shm_unlink file: " << ipc_name_;
       }
diff --git a/paddle/fluid/operators/controlflow/feed_op.cc b/paddle/fluid/operators/controlflow/feed_op.cc
index 194dccb0e6ea03ebbf16af5ff02d93bc883589cd..09684b8d737bae2c2677e026f534581ec7ba881f 100644
--- a/paddle/fluid/operators/controlflow/feed_op.cc
+++ b/paddle/fluid/operators/controlflow/feed_op.cc
@@ -305,24 +305,15 @@ PD_REGISTER_GENERAL_KERNEL(
     ALL_LAYOUT,
     paddle::operators::FeedStringsKernel<phi::CustomContext>,
     ALL_DTYPE) {}
-
-#elif defined(PADDLE_WITH_CUSTOM_DEVICE)
-PD_REGISTER_GENERAL_KERNEL(
-    feed_dense_tensor,
-    custom_cpu,
-    ALL_LAYOUT,
-    paddle::operators::FeedDenseTensorKernel<phi::CustomContext>,
-    ALL_DTYPE) {}
-PD_REGISTER_GENERAL_KERNEL(
-    feed_sparse_coo_tensor,
-    custom_cpu,
-    ALL_LAYOUT,
-    paddle::operators::FeedSparseCooTensorKernel<phi::CustomContext>,
-    ALL_DTYPE) {}
-PD_REGISTER_GENERAL_KERNEL(
-    feed_strings,
-    custom_cpu,
-    ALL_LAYOUT,
-    paddle::operators::FeedStringsKernel<phi::CustomContext>,
-    ALL_DTYPE) {}
+#endif
+#ifdef PADDLE_WITH_CUSTOM_DEVICE
+namespace paddle {
+namespace operators {
+template void FeedDenseTensorKernel<phi::CustomContext>(
+    const phi::CustomContext& dev_ctx,
+    const phi::ExtendedTensor& x,
+    int col,
+    phi::DenseTensor* out);
+}  // namespace operators
+}  // namespace paddle
 #endif
diff --git a/paddle/fluid/operators/custom_device_common_op_registry.cc b/paddle/fluid/operators/custom_device_common_op_registry.cc
index 69625c03dbabec5c8b266ed9d6fb832d8e0236e9..bbb75d41833231b313c198bca7dde61aaa9d1508 100644
--- a/paddle/fluid/operators/custom_device_common_op_registry.cc
+++ b/paddle/fluid/operators/custom_device_common_op_registry.cc
@@ -17,6 +17,7 @@ limitations under the License. */
 #include "paddle/fluid/operators/run_program_op.h"
 #include "paddle/fluid/operators/save_combine_op.h"
 #include "paddle/phi/backends/device_manager.h"
+#include "paddle/phi/core/kernel_registry.h"
 
 #define REGISTER_OP_CUSTOM_DEVICE_KERNEL(op_type, dev_type, ...)             \
   static paddle::framework::OpKernelRegistrar<phi::CustomPlace, __VA_ARGS__> \
@@ -26,10 +27,30 @@ limitations under the License. */
           paddle::framework::OpKernelType::kDefaultCustomizedTypeValue);     \
   __op_custom_device_kernel_registrar_##op_type##_##__acosf##__.Touch();
 
+#define REGISTER_CUSTOM_DEVICE_GENERAL_KERNEL(                             \
+    kernel_name, dev_type, layout, kernel_fn)                              \
+  static phi::KernelRegistrar                                              \
+      __reg_custom_device_phi_kernel_##kernel_name##_##backend##_##layout( \
+          phi::RegType::INNER,                                             \
+          #kernel_name,                                                    \
+          dev_type,                                                        \
+          DATALAYOUT(layout),                                              \
+          ::phi::KernelArgsParseFunctor<decltype(&kernel_fn)>::Parse,      \
+          [](const phi::KernelKey& kernel_key, phi::Kernel* kernel) {},    \
+          PHI_KERNEL(kernel_fn),                                           \
+          PHI_VARIADIC_KERNEL(kernel_fn))
+
 namespace paddle {
 namespace operators {
 
+template <typename Context>
+void FeedDenseTensorKernel(const Context& dev_ctx,
+                           const phi::ExtendedTensor& x,
+                           int col,
+                           phi::DenseTensor* out);
+
 void RegisterCustomDeviceCommonKernel(const std::string& dev_type) {
+#ifdef PADDLE_WITH_CUSTOM_DEVICE
   auto device_type = dev_type.c_str();
   /* see [Why use single type kernel] */
   REGISTER_OP_CUSTOM_DEVICE_KERNEL(
@@ -66,9 +87,16 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) {
           LoadCombineOpKernel<paddle::platform::CustomDeviceContext, int8_t>,
       paddle::operators::
           LoadCombineOpKernel<paddle::platform::CustomDeviceContext, int64_t>);
+  REGISTER_CUSTOM_DEVICE_GENERAL_KERNEL(
+      feed_dense_tensor,
+      device_type,
+      ALL_LAYOUT,
+      paddle::operators::FeedDenseTensorKernel<phi::CustomContext>);
+#endif
 }
 
 }  // namespace operators
 }  // namespace paddle
 
 #undef REGISTER_OP_CUSTOM_DEVICE_KERNEL
+#undef REGISTER_CUSTOM_DEVICE_GENERAL_KERNEL
diff --git a/paddle/fluid/operators/sequence_ops/sequence_pad_op.cc b/paddle/fluid/operators/sequence_ops/sequence_pad_op.cc
index 12c82250157282dd2bb58307318b4ca4f1c3ee68..ac1d1690cce55e9c59bdb38c4b9d5a441406d0b0 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_pad_op.cc
+++ b/paddle/fluid/operators/sequence_ops/sequence_pad_op.cc
@@ -56,6 +56,7 @@ class SequencePadOp : public framework::OperatorWithKernel {
     auto pad_value_dims = ctx->GetInputDim("PadValue");
     PADDLE_ENFORCE_EQ(
         pad_value_dims == phi::make_ddim({1}) ||
+            pad_value_dims == phi::make_ddim({}) ||
             pad_value_dims == time_step_dims,
         true,
         platform::errors::InvalidArgument(
diff --git a/paddle/fluid/prim/api/generated/prim_api/static_prim_api.cc b/paddle/fluid/prim/api/generated/prim_api/static_prim_api.cc
index fd309750ed6014048421d370501bad0a1fe71eff..30a82b4989972b4a0dd6f24b077b0c662306115e 100644
--- a/paddle/fluid/prim/api/generated/prim_api/static_prim_api.cc
+++ b/paddle/fluid/prim/api/generated/prim_api/static_prim_api.cc
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include <string.h>
 #include <memory>
 #include <sstream>
 #include <string>
@@ -166,7 +167,16 @@ Tensor full<DescTensor>(const IntArray& shape,
       phi::errors::InvalidArgument(
           "We only support float32/float16 for full, but we got data type: %s",
           phi::DataTypeToString(dtype)));
-  op->SetAttr("value", value.to<float>());
+  if (dtype == phi::DataType::FLOAT32) {
+    op->SetAttr("value", value.to<float>());
+  } else if (dtype == phi::DataType::FLOAT64) {
+    op->SetAttr("str_value", std::to_string(value.to<double>()));
+  } else if (dtype == phi::DataType::FLOAT16) {
+    op->SetAttr("str_value", std::to_string(value.to<float>()));
+  } else {
+    PADDLE_THROW(phi::errors::Unimplemented(
+        "We only support float64/float32/float16 for full"));
+  }
   op->SetAttr("dtype", paddle::framework::TransToProtoVarType(dtype));
   op->SetOutput(
       "Out", {std::static_pointer_cast<prim::DescTensor>(out.impl())->Name()});
diff --git a/paddle/fluid/prim/api/manual/backward/composite_backward_api.h b/paddle/fluid/prim/api/manual/backward/composite_backward_api.h
index c148fca37bf00eb0c7411c5f880cb9391ddda0f5..99ef82d08881c28f13cc88a18371c23d447c88d8 100644
--- a/paddle/fluid/prim/api/manual/backward/composite_backward_api.h
+++ b/paddle/fluid/prim/api/manual/backward/composite_backward_api.h
@@ -32,7 +32,7 @@ void tanh_grad(const Tensor& out, const Tensor& grad_out, Tensor* grad_x) {
   auto tmp = pow<T>(out, 2.0);
   tmp = scale<T>(tmp, -1.0, 1.0, true);
   auto grad_x_tmp = multiply<T>(grad_out, tmp);
-  set_output<T>(grad_x_tmp.impl(), grad_x);
+  set_output<T>(grad_x_tmp, grad_x);
 }
 
 template <typename T>
@@ -53,7 +53,7 @@ void subtract_grad(const Tensor& x,
         auto dy_reduce_res = sum<T>(
             scale_out_grad, phi::vectorize(reduce_dim), y.dtype(), false);
         auto dy_tmp = reshape<T>(dy_reduce_res, phi::vectorize(y.dims()));
-        set_output<T>(dy_tmp.impl(), dy);
+        set_output<T>(dy_tmp, dy);
       }
     } else {
       by_pass<T>(scale_out_grad, dy);
@@ -69,7 +69,7 @@ void subtract_grad(const Tensor& x,
         auto dx_reduce_res =
             sum<T>(out_grad, phi::vectorize(reduce_dim), x.dtype(), false);
         auto dx_tmp = reshape<T>(dx_reduce_res, phi::vectorize(x.dims()));
-        set_output<T>(dx_tmp.impl(), dx);
+        set_output<T>(dx_tmp, dx);
       }
     } else {
       by_pass<T>(out_grad, dx);
@@ -94,7 +94,7 @@ void add_grad(const Tensor& x,
         auto dy_reduce_res =
             sum<T>(out_grad, phi::vectorize(reduce_dim), y.dtype(), false);
         auto dy_tmp = reshape<T>(dy_reduce_res, phi::vectorize(y.dims()));
-        set_output<T>(dy_tmp.impl(), dy);
+        set_output<T>(dy_tmp, dy);
       }
 
     } else {
@@ -111,7 +111,7 @@ void add_grad(const Tensor& x,
         auto dx_reduce_res =
             sum<T>(out_grad, phi::vectorize(reduce_dim), x.dtype(), false);
         auto dx_tmp = reshape<T>(dx_reduce_res, phi::vectorize(x.dims()));
-        set_output<T>(dx_tmp.impl(), dx);
+        set_output<T>(dx_tmp, dx);
       }
     } else {
       by_pass<T>(out_grad, dx);
@@ -139,22 +139,26 @@ void sum_grad(const Tensor& x,
     reduce_all = false;
   }
   auto x_grad_tmp = Tensor();
-  if (!keepdim) {
-    auto axis_ = std::vector<int64_t>();
-    if (reduce_all) {
-      for (int64_t i = 1; i < x_dim_size; i++) {
-        axis_.push_back(i);
+  if (x_dim_size == 1) {
+    x_grad_tmp = expand<T>(out_grad, IntArray(x_dim));
+  } else {
+    if (!keepdim) {
+      auto axis_ = std::vector<int64_t>();
+      if (reduce_all) {
+        for (int64_t i = 1; i < x_dim_size; i++) {
+          axis_.push_back(i);
+        }
+      } else {
+        axis_ = axis.GetData();
       }
+      auto out_grad_ = unsqueeze<T>(out_grad, axis_);
+      x_grad_tmp = expand<T>(out_grad_, IntArray(x_dim));
     } else {
-      axis_ = axis.GetData();
+      x_grad_tmp = expand<T>(out_grad, IntArray(x_dim));
     }
-    auto out_grad_ = unsqueeze<T>(out_grad, axis_);
-    x_grad_tmp = expand<T>(out_grad_, IntArray(x_dim));
-  } else {
-    x_grad_tmp = expand<T>(out_grad, IntArray(x_dim));
   }
 
-  set_output<T>(x_grad_tmp.impl(), x_grad);
+  set_output<T>(x_grad_tmp, x_grad);
 }
 
 template <typename T>
@@ -175,36 +179,36 @@ void divide_grad(const Tensor& x,
       // Maybe need reduce here
       phi::DDim reduce_dim = get_reduce_dims(y.dims(), x.dims());
       if (!reduce_dim.size()) {
-        set_output<T>(dy_res.impl(), dy);
+        set_output<T>(dy_res, dy);
       } else {
         auto dy_reduce_res =
             sum<T>(dy_res, phi::vectorize(reduce_dim), y.dtype(), false);
         auto dy_tmp = reshape<T>(dy_reduce_res, phi::vectorize(y.dims()));
-        set_output<T>(dy_tmp.impl(), dy);
+        set_output<T>(dy_tmp, dy);
       }
     } else {
-      set_output<T>(dy_res.impl(), dy);
+      set_output<T>(dy_res, dy);
     }
   }  // indicate we will compute dy
   if (dx) {
     // dx = (1/y) * dout
-    auto one_tensor = full<T>(phi::vectorize(y.dims()), 1.0);
+    auto one_tensor = full<T>(phi::vectorize(y.dims()), 1.0, y.dtype());
     auto tmp0 = divide<T>(one_tensor, y);
     auto dx_res = multiply<T>(tmp0, out_grad);
     if (y.dims() != x.dims()) {
       // Maybe need reduce here
       auto reduce_dim = get_reduce_dims(x.dims(), y.dims());
       if (!reduce_dim.size()) {
-        set_output<T>(dx_res.impl(), dx);
+        set_output<T>(dx_res, dx);
       } else {
         auto dx_reduce_res =
             sum<T>(dx_res, phi::vectorize(reduce_dim), x.dtype(), false);
         auto dx_tmp = reshape<T>(dx_reduce_res, phi::vectorize(x.dims()));
-        set_output<T>(dx_tmp.impl(), dx);
+        set_output<T>(dx_tmp, dx);
       }
 
     } else {
-      set_output<T>(dx_res.impl(), dx);
+      set_output<T>(dx_res, dx);
     }
   }  // indicate we will compute dx
 }
@@ -215,7 +219,7 @@ void sqrt_grad(const Tensor& out, const Tensor& out_grad, Tensor* x_grad) {
     auto div_x = full<T>(phi::vectorize(out.dims()), 0.5);
     auto tmp = divide<T>(div_x, out);
     auto x_grad_tmp = multiply<T>(out_grad, tmp);
-    set_output<T>(x_grad_tmp.impl(), x_grad);
+    set_output<T>(x_grad_tmp, x_grad);
   }
 }
 
@@ -231,7 +235,7 @@ void multiply_grad(const Tensor& x,
     if (x.dims() != y.dims()) {
       auto axes = get_reduce_dims(x.dims(), y.dims());
       if (!axes.size()) {
-        set_output<T>(x_grad_unreduce.impl(), x_grad);
+        set_output<T>(x_grad_unreduce, x_grad);
       } else {
         auto x_grad_reduced = sum<T>(x_grad_unreduce,
                                      phi::vectorize(axes),
@@ -240,10 +244,10 @@ void multiply_grad(const Tensor& x,
         if (x_grad_reduced.dims().size() != x.dims().size()) {
           x_grad_reduced = reshape<T>(x_grad_reduced, x.shape());
         }
-        set_output<T>(x_grad_reduced.impl(), x_grad);
+        set_output<T>(x_grad_reduced, x_grad);
       }
     } else {
-      set_output<T>(x_grad_unreduce.impl(), x_grad);
+      set_output<T>(x_grad_unreduce, x_grad);
     }
   }
   if (y_grad) {
@@ -251,7 +255,7 @@ void multiply_grad(const Tensor& x,
     if (y.dims() != x.dims()) {
       auto axes = get_reduce_dims(y.dims(), x.dims());
       if (!axes.size()) {
-        set_output<T>(y_grad_unreduce.impl(), y_grad);
+        set_output<T>(y_grad_unreduce, y_grad);
       } else {
         auto y_grad_reduced = sum<T>(y_grad_unreduce,
                                      phi::vectorize(axes),
@@ -260,10 +264,10 @@ void multiply_grad(const Tensor& x,
         if (y_grad_reduced.dims().size() != y.dims().size()) {
           y_grad_reduced = reshape<T>(y_grad_reduced, y.shape());
         }
-        set_output<T>(y_grad_reduced.impl(), y_grad);
+        set_output<T>(y_grad_reduced, y_grad);
       }
     } else {
-      set_output<T>(y_grad_unreduce.impl(), y_grad);
+      set_output<T>(y_grad_unreduce, y_grad);
     }
   }
 }
@@ -284,7 +288,7 @@ void expand_grad(const Tensor& x,
         if (reduced.dims().size() != x.dims().size()) {
           reduced = reshape<T>(reduced, x.shape());
         }
-        set_output<T>(reduced.impl(), x_grad);
+        set_output<T>(reduced, x_grad);
       }
     } else {
       by_pass<T>(out_grad, x_grad);
@@ -295,7 +299,7 @@ void expand_grad(const Tensor& x,
 template <typename T>
 void exp_grad(const Tensor& out, const Tensor& out_grad, Tensor* x_grad) {
   if (x_grad) {
-    set_output<T>(multiply<T>(out_grad, out).impl(), x_grad);
+    set_output<T>(multiply<T>(out_grad, out), x_grad);
   }
 }
 
diff --git a/paddle/fluid/prim/api/manual/utils/eager_utils.cc b/paddle/fluid/prim/api/manual/utils/eager_utils.cc
index dbf9615058d346084feef6af139ebfd5326f3df0..353945557f1d02386645a79c6b2d871fe90fb588 100644
--- a/paddle/fluid/prim/api/manual/utils/eager_utils.cc
+++ b/paddle/fluid/prim/api/manual/utils/eager_utils.cc
@@ -49,7 +49,6 @@ void set_output<Tensor>(const paddle::experimental::Tensor& x_tmp,
 template <>
 void by_pass<Tensor>(const paddle::experimental::Tensor& x, Tensor* out) {
   set_output<Tensor>(x, out);
-  // out->set_impl(x.impl());
 }
 
 }  // namespace prim
diff --git a/paddle/fluid/prim/api/manual/utils/static_utils.cc b/paddle/fluid/prim/api/manual/utils/static_utils.cc
index 4def77831b684921f1b012c722003a6ce74417fe..74656cfe7d48d17fe0c3fc2122896ef10f8535b7 100644
--- a/paddle/fluid/prim/api/manual/utils/static_utils.cc
+++ b/paddle/fluid/prim/api/manual/utils/static_utils.cc
@@ -69,7 +69,6 @@ void by_pass<DescTensor>(const paddle::experimental::Tensor& x,
   op->InferVarType(block);
   op->InferShape(*block);
   set_output<DescTensor>(new_out, out);
-  // out->set_impl(new_out.impl());
 }
 
 }  // namespace prim
diff --git a/paddle/fluid/prim/tests/test_eager_prim.cc b/paddle/fluid/prim/tests/test_eager_prim.cc
index 7bb9a389828f28d5cfe691a649057a893ebbc133..35902797ea24517d834715711a670f6ece4b899d 100644
--- a/paddle/fluid/prim/tests/test_eager_prim.cc
+++ b/paddle/fluid/prim/tests/test_eager_prim.cc
@@ -68,16 +68,16 @@ TEST(EagerPrim, TanhBackwardTest) {
   paddle::experimental::Tensor out0 = tanh_ad_func(tensor0);
   std::vector<paddle::experimental::Tensor> outs0 = {out0};
   // Disable prim
-  PrimCommonUtils::SetPrimEnabled(false);
-  ASSERT_FALSE(PrimCommonUtils::IsPrimEnabled());
+  PrimCommonUtils::SetBwdPrimEnabled(false);
+  ASSERT_FALSE(PrimCommonUtils::IsBwdPrimEnabled());
   // 4. Run Backward
   egr::Backward(outs0, {}, false);
 
   paddle::experimental::Tensor out1 = tanh_ad_func(tensor1);
   std::vector<paddle::experimental::Tensor> outs1 = {out1};
   // Disable prim
-  PrimCommonUtils::SetPrimEnabled(true);
-  ASSERT_TRUE(PrimCommonUtils::IsPrimEnabled());
+  PrimCommonUtils::SetBwdPrimEnabled(true);
+  ASSERT_TRUE(PrimCommonUtils::IsBwdPrimEnabled());
   // 4. Run Backward
   ::egr::Backward(outs1, {}, false);
   VLOG(7)
@@ -99,10 +99,10 @@ TEST(EagerPrim, TanhBackwardTest) {
 }
 
 TEST(EagerPrim, TestFlags) {
-  PrimCommonUtils::SetPrimEnabled(true);
-  ASSERT_TRUE(PrimCommonUtils::IsPrimEnabled());
-  PrimCommonUtils::SetPrimEnabled(false);
-  ASSERT_FALSE(PrimCommonUtils::IsPrimEnabled());
+  PrimCommonUtils::SetBwdPrimEnabled(true);
+  ASSERT_TRUE(PrimCommonUtils::IsBwdPrimEnabled());
+  PrimCommonUtils::SetBwdPrimEnabled(false);
+  ASSERT_FALSE(PrimCommonUtils::IsBwdPrimEnabled());
 }
 
 }  // namespace prim
diff --git a/paddle/fluid/prim/tests/test_static_prim.cc b/paddle/fluid/prim/tests/test_static_prim.cc
index 87475559617fb6b70c89417769c62695891ea443..fe7a6ca4040448306f79978e527dafa10c9a9a27 100644
--- a/paddle/fluid/prim/tests/test_static_prim.cc
+++ b/paddle/fluid/prim/tests/test_static_prim.cc
@@ -341,10 +341,10 @@ TEST(StaticCompositeGradMaker, TestMutiOutputMethod) {
 }
 
 TEST(StaticPrim, TestFlags) {
-  PrimCommonUtils::SetPrimEnabled(true);
-  ASSERT_TRUE(PrimCommonUtils::IsPrimEnabled());
-  PrimCommonUtils::SetPrimEnabled(false);
-  ASSERT_FALSE(PrimCommonUtils::IsPrimEnabled());
+  PrimCommonUtils::SetBwdPrimEnabled(true);
+  ASSERT_TRUE(PrimCommonUtils::IsBwdPrimEnabled());
+  PrimCommonUtils::SetBwdPrimEnabled(false);
+  ASSERT_FALSE(PrimCommonUtils::IsBwdPrimEnabled());
 }
 
 }  // namespace prim
diff --git a/paddle/fluid/prim/utils/static/static_global_utils.cc b/paddle/fluid/prim/utils/static/static_global_utils.cc
index 3e3a0f56977e3c78e0e5de72b53654b540907eee..9631994ab2bce79e24023fe95c77934fedd2acda 100644
--- a/paddle/fluid/prim/utils/static/static_global_utils.cc
+++ b/paddle/fluid/prim/utils/static/static_global_utils.cc
@@ -18,6 +18,7 @@ namespace paddle {
 namespace prim {
 StaticCompositeContext* StaticCompositeContext::static_composite_context_ =
     new StaticCompositeContext();
-thread_local bool StaticCompositeContext::enable_prim_ = false;
+thread_local bool StaticCompositeContext::enable_bwd_prim_ = false;
+thread_local bool StaticCompositeContext::enable_fwd_prim_ = false;
 }  // namespace prim
 }  // namespace paddle
diff --git a/paddle/fluid/prim/utils/static/static_global_utils.h b/paddle/fluid/prim/utils/static/static_global_utils.h
index f70659c278aeca0ba81d5096aa27163d972b5003..08407013673621a364c177aa1c453e8904fcac63 100644
--- a/paddle/fluid/prim/utils/static/static_global_utils.h
+++ b/paddle/fluid/prim/utils/static/static_global_utils.h
@@ -56,9 +56,18 @@ class StaticCompositeContext {
     return generator_->Generate(key);
   }
 
-  void SetPrimEnabled(bool enable_prim) { enable_prim_ = enable_prim; }
+  void SetBwdPrimEnabled(bool enable_prim) { enable_bwd_prim_ = enable_prim; }
 
-  bool IsPrimEnabled() { return enable_prim_; }
+  bool IsBwdPrimEnabled() { return enable_bwd_prim_; }
+
+  void SetFwdPrimEnabled(bool enable_prim) { enable_fwd_prim_ = enable_prim; }
+
+  bool IsFwdPrimEnabled() { return enable_fwd_prim_; }
+
+  void SetAllPrimEnabled(bool enable_prim) {
+    enable_fwd_prim_ = enable_prim;
+    enable_bwd_prim_ = enable_prim;
+  }
 
  private:
   StaticCompositeContext()
@@ -66,7 +75,8 @@ class StaticCompositeContext {
 
   framework::BlockDesc* current_block_desc_;
   std::unique_ptr<UniqueNameGenerator> generator_;
-  static thread_local bool enable_prim_;
+  static thread_local bool enable_bwd_prim_;
+  static thread_local bool enable_fwd_prim_;
   static StaticCompositeContext* static_composite_context_;
   DISABLE_COPY_AND_ASSIGN(StaticCompositeContext);
 };
diff --git a/paddle/fluid/prim/utils/utils.cc b/paddle/fluid/prim/utils/utils.cc
index ddb97ab640d20b84e0c9ab143ead2129b45c884d..fb415262c8d13e2e0ca297f98eda8288c5ceb53c 100644
--- a/paddle/fluid/prim/utils/utils.cc
+++ b/paddle/fluid/prim/utils/utils.cc
@@ -19,12 +19,24 @@
 PADDLE_DEFINE_EXPORTED_bool(prim_enabled, false, "enable_prim or not");
 namespace paddle {
 namespace prim {
-bool PrimCommonUtils::IsPrimEnabled() {
-  return StaticCompositeContext::Instance().IsPrimEnabled();
+bool PrimCommonUtils::IsBwdPrimEnabled() {
+  return StaticCompositeContext::Instance().IsBwdPrimEnabled();
 }
 
-void PrimCommonUtils::SetPrimEnabled(bool enable_prim) {
-  return StaticCompositeContext::Instance().SetPrimEnabled(enable_prim);
+void PrimCommonUtils::SetBwdPrimEnabled(bool enable_prim) {
+  return StaticCompositeContext::Instance().SetBwdPrimEnabled(enable_prim);
+}
+
+bool PrimCommonUtils::IsFwdPrimEnabled() {
+  return StaticCompositeContext::Instance().IsFwdPrimEnabled();
+}
+
+void PrimCommonUtils::SetFwdPrimEnabled(bool enable_prim) {
+  return StaticCompositeContext::Instance().SetFwdPrimEnabled(enable_prim);
+}
+
+void PrimCommonUtils::SetAllPrimEnabled(bool enable_prim) {
+  return StaticCompositeContext::Instance().SetAllPrimEnabled(enable_prim);
 }
 }  // namespace prim
 }  // namespace paddle
diff --git a/paddle/fluid/prim/utils/utils.h b/paddle/fluid/prim/utils/utils.h
index 14757c4eecde6aaf693b0b6fd2f476b56efd0e04..38973dc87b8adf9408e0fc62dd85d11cad754551 100644
--- a/paddle/fluid/prim/utils/utils.h
+++ b/paddle/fluid/prim/utils/utils.h
@@ -18,8 +18,11 @@ namespace paddle {
 namespace prim {
 class PrimCommonUtils {
  public:
-  static bool IsPrimEnabled();
-  static void SetPrimEnabled(bool enabled);
+  static bool IsBwdPrimEnabled();
+  static void SetBwdPrimEnabled(bool enabled);
+  static bool IsFwdPrimEnabled();
+  static void SetFwdPrimEnabled(bool enabled);
+  static void SetAllPrimEnabled(bool enabled);
 };
 }  // namespace prim
 }  // namespace paddle
diff --git a/paddle/fluid/pybind/bind_fleet_executor.cc b/paddle/fluid/pybind/bind_fleet_executor.cc
index b4a6432e9e58b2ae8993651e8767731516302b0c..8fb3dc4cda6c956adae60bb7bdb4e7c721998426 100644
--- a/paddle/fluid/pybind/bind_fleet_executor.cc
+++ b/paddle/fluid/pybind/bind_fleet_executor.cc
@@ -65,6 +65,7 @@ struct npy_format_descriptor<paddle::platform::float16> {
 namespace paddle {
 namespace pybind {
 
+using paddle::distributed::DependType;
 using paddle::distributed::DistModel;
 using paddle::distributed::DistModelConfig;
 using paddle::distributed::DistModelDataBuf;
@@ -164,18 +165,17 @@ void BindFleetExecutor(py::module* m) {
       .def(
           "run", &FleetExecutor::Run, py::call_guard<py::gil_scoped_release>());
 
+  py::enum_<DependType>(*m, "DependType")
+      .value("NORMAL", DependType::NORMAL)
+      .value("LOOP", DependType::LOOP)
+      .value("STOP_LOOP", DependType::STOP_LOOP);
+
   py::class_<TaskNode>(*m, "TaskNode")
-      .def(py::init<framework::ProgramDesc*,
-                    int64_t,
-                    int64_t,
-                    int64_t,
-                    int64_t>())
       .def(py::init<framework::ProgramDesc*, int64_t, int64_t, int64_t>())
       .def(py::init<int32_t,
                     const std::vector<framework::OpDesc*>&,
                     int64_t,
                     int64_t,
-                    int64_t,
                     int64_t>())
       .def("task_id", &TaskNode::task_id)
       .def("add_upstream_task", &TaskNode::AddUpstreamTask)
@@ -183,6 +183,7 @@ void BindFleetExecutor(py::module* m) {
       .def("set_run_pre_steps", &TaskNode::SetRunPerSteps)
       .def("set_run_at_offset", &TaskNode::SetRunAtOffset)
       .def("set_type", &TaskNode::SetType)
+      .def("set_cond_var_name", &TaskNode::SetCondVarName)
       .def("role", &TaskNode::role)
       .def("init", [](TaskNode& self) { self.Init(); })
       .def("set_program", &TaskNode::SetProgram);
diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc
index 43ee2d479b0b76b0d6851fe2c1b58e06e977fb76..d2f622537216b3954298b60d1514b5781b1b5eb1 100644
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -660,8 +660,16 @@ PYBIND11_MODULE(libpaddle, m) {
         return oss.str();
       });
 
-  m.def("set_prim_enabled", &paddle::prim::PrimCommonUtils::SetPrimEnabled);
-  m.def("is_prim_enabled", &paddle::prim::PrimCommonUtils::IsPrimEnabled);
+  m.def("__set_bwd_prim_enabled",
+        &paddle::prim::PrimCommonUtils::SetBwdPrimEnabled);
+  m.def("_is_bwd_prim_enabled",
+        &paddle::prim::PrimCommonUtils::IsBwdPrimEnabled);
+  m.def("__set_fwd_prim_enabled",
+        &paddle::prim::PrimCommonUtils::SetFwdPrimEnabled);
+  m.def("_is_fwd_prim_enabled",
+        &paddle::prim::PrimCommonUtils::IsFwdPrimEnabled);
+  m.def("__set_all_prim_enabled",
+        &paddle::prim::PrimCommonUtils::SetAllPrimEnabled);
   m.def("set_num_threads", &platform::SetNumThreads);
 
   m.def("disable_signal_handler", &DisableSignalHandler);
@@ -1264,8 +1272,9 @@ All parameter, weight, gradient are variables in Paddle.
           // priority of GradCompOpMaker is less than GradCompMaker for better
           // performance.
           std::vector<std::unique_ptr<OpDesc>> grad_op_descs;
-          if (paddle::prim::PrimCommonUtils::IsPrimEnabled()) {
+          if (paddle::prim::PrimCommonUtils::IsBwdPrimEnabled()) {
             if (grad_comp_op_maker != nullptr) {
+              VLOG(3) << "Runing composite fun for " << op_desc.Type();
               grad_op_descs = grad_comp_op_maker(op_desc,
                                                  no_grad_set,
                                                  &grad_to_var,
diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml
index f47e206c7ce2fe2742529382ef18092f92571cde..615008a8291c5599132386119ffbf985559a2f7b 100755
--- a/paddle/phi/api/yaml/legacy_backward.yaml
+++ b/paddle/phi/api/yaml/legacy_backward.yaml
@@ -42,7 +42,7 @@
   kernel :
     func : add_grad
   no_need_buffer : x, y
-  composite : add_grad(Tensor x, Tensor y, Tensor out_grad, int axis)
+  composite : add_grad(x, y, out_grad, axis)
   backward : add_double_grad
   inplace : (out_grad -> x_grad)
 
@@ -390,7 +390,7 @@
     param : [x, y]
   kernel :
     func : divide_grad
-  composite : divide_grad(Tensor x, Tensor y, Tensor out, Tensor out_grad, int axis = -1)
+  composite : divide_grad(x, y, out, out_grad, -1)
   backward : divide_double_grad
 
 - backward_op : dropout_grad
@@ -1319,7 +1319,7 @@
   kernel :
     func : subtract_grad
   no_need_buffer : x, y
-  composite : subtract_grad(Tensor x, Tensor y, Tensor out_grad, int axis)
+  composite : subtract_grad(x, y, out_grad, axis)
   backward : subtract_double_grad
   inplace : (out_grad -> x_grad)
 
diff --git a/paddle/phi/backends/onednn/onednn_reuse.h b/paddle/phi/backends/onednn/onednn_reuse.h
index dbb70cb07aaeca1bb2a4245a7dcdf0d0415cf2bc..c398138e2d5fa06ae6c35ca7901bb925689dcbb9 100644
--- a/paddle/phi/backends/onednn/onednn_reuse.h
+++ b/paddle/phi/backends/onednn/onednn_reuse.h
@@ -112,42 +112,6 @@ static void AppendActivation(const OneDNNContext& dev_ctx,
   }
 }
 
-static std::unordered_map<std::string, std::string> GetAttributeMap(
-    std::string act_type) {
-  std::unordered_map<std::string, std::string> attr_map;
-  if (act_type == "swish") {
-    attr_map.emplace("beta", "fuse_alpha");
-  } else if (act_type == "relu6") {
-    attr_map.emplace("threshold", "fuse_alpha");
-  } else if (act_type == "hard_sigmoid") {
-    attr_map.emplace("slope", "fuse_alpha");
-    attr_map.emplace("offset", "fuse_beta");
-  } else if (act_type == "clip") {
-    attr_map.emplace("min", "fuse_alpha");
-    attr_map.emplace("max", "fuse_beta");
-  } else {
-    attr_map.emplace("alpha", "fuse_alpha");
-    attr_map.emplace("beta", "fuse_beta");
-  }
-  return attr_map;
-}
-
-static std::vector<std::string> GetSupportedActivations() {
-  return std::vector<std::string>{"abs",
-                                  "clip",
-                                  "gelu",
-                                  "hard_sigmoid",
-                                  "hard_swish",
-                                  "leaky_relu",
-                                  "mish",
-                                  "relu",
-                                  "relu6",
-                                  "sigmoid",
-                                  "sqrt",
-                                  "swish",
-                                  "tanh"};
-}
-
 template <typename T,
           typename TForward,
           typename TBackward = onednn_dummy_primitive,
@@ -1756,22 +1720,22 @@ static std::vector<int64_t> TransposeAxis(const std::vector<int64_t>& x,
   auto axis_set = std::set<int>(axis.begin(), axis.end());
   PADDLE_ENFORCE_EQ(axis_set.size(),
                     axis_size,
-                    paddle::platform::errors::InvalidArgument(
+                    phi::errors::InvalidArgument(
                         "In an axis array, elements must be unique."));
 
-  PADDLE_ENFORCE_EQ(in_rank,
-                    axis_size,
-                    paddle::platform::errors::InvalidArgument(
-                        "The input dimension's size "
-                        "should be equal to the axis's size. "
-                        "But received dimension is %d, "
-                        "axis's size is %d",
-                        in_rank,
-                        axis_size));
+  PADDLE_ENFORCE_EQ(
+      in_rank,
+      axis_size,
+      phi::errors::InvalidArgument("The input dimension's size "
+                                   "should be equal to the axis's size. "
+                                   "But received dimension is %d, "
+                                   "axis's size is %d",
+                                   in_rank,
+                                   axis_size));
 
   PADDLE_ENFORCE_LT(*std::max_element(axis.begin(), axis.end()),
                     axis_size,
-                    paddle::platform::errors::InvalidArgument(
+                    phi::errors::InvalidArgument(
                         "Axis values must be ranging from 0 to (dims - 1)."));
 
   std::vector<int64_t> new_x(x.size());
diff --git a/paddle/phi/backends/xpu/xpu2_op_list.cc b/paddle/phi/backends/xpu/xpu2_op_list.cc
index 367231972acbcf6a504ab8dc36b20e5763cf9b0b..99cb79035b4b25378c4aae5fb8705d5226e80230 100644
--- a/paddle/phi/backends/xpu/xpu2_op_list.cc
+++ b/paddle/phi/backends/xpu/xpu2_op_list.cc
@@ -1,4 +1,4 @@
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
@@ -67,10 +67,7 @@ XPUOpMap& get_kl2_ops() {
                      phi::DataType::INT64})},
       {"bilinear_interp_v2", XPUKernelSet({phi::DataType::FLOAT32})},
       {"bilinear_interp_v2_grad", XPUKernelSet({phi::DataType::FLOAT32})},
-      {"bitwise_and", XPUKernelSet({phi::DataType::BOOL})},
       {"bitwise_not", XPUKernelSet({phi::DataType::BOOL})},
-      {"bitwise_or", XPUKernelSet({phi::DataType::BOOL})},
-      {"bitwise_xor", XPUKernelSet({phi::DataType::BOOL})},
       {"broadcast", XPUKernelSet({phi::DataType::FLOAT32})},
       {"c_allgather",
        XPUKernelSet({phi::DataType::FLOAT16,
@@ -109,6 +106,8 @@ XPUOpMap& get_kl2_ops() {
        XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})},
       {"clip", XPUKernelSet({phi::DataType::FLOAT32})},
       {"clip_by_norm", XPUKernelSet({phi::DataType::FLOAT32})},
+      {"clip_grad",
+       XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::INT32})},
       {"coalesce_tensor",
        XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})},
       {"concat_grad",
@@ -374,6 +373,10 @@ XPUOpMap& get_kl2_ops() {
                      phi::DataType::BOOL,
                      phi::DataType::FLOAT16,
                      phi::DataType::FLOAT32})},
+      {"max_pool2d_with_index",
+       XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})},
+      {"max_pool2d_with_index_grad",
+       XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})},
       {"matmul_grad",
        XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})},
       {"matmul_v2_grad",
@@ -435,7 +438,10 @@ XPUOpMap& get_kl2_ops() {
       {"reduce_min", XPUKernelSet({phi::DataType::FLOAT32})},
       {"reduce_prod", XPUKernelSet({phi::DataType::FLOAT32})},
       {"reduce_sum_grad", XPUKernelSet({phi::DataType::FLOAT32})},
-      {"reduce_sum", XPUKernelSet({phi::DataType::FLOAT32})},
+      {"reduce_sum",
+       XPUKernelSet({phi::DataType::FLOAT32,
+                     phi::DataType::INT8,
+                     phi::DataType::INT64})},
       {"relu6", XPUKernelSet({phi::DataType::FLOAT32})},
       {"relu6_grad", XPUKernelSet({phi::DataType::FLOAT32})},
       {"relu_grad",
diff --git a/paddle/phi/core/flags.cc b/paddle/phi/core/flags.cc
index 8b5a78575d220d7d04c7f1046237d79053f8cb30..526457499c8844ddca40587eaba2ffa7d3202896 100644
--- a/paddle/phi/core/flags.cc
+++ b/paddle/phi/core/flags.cc
@@ -146,17 +146,17 @@ PADDLE_DEFINE_EXPORTED_bool(
  * CUDA related related FLAG
  * Name: FLAGS_gemm_use_half_precision_compute_type
  * Since Version: 2.4
- * Value Range: bool, default=true
+ * Value Range: bool, default=false
  * Example:
  * Note: whether to use fp16 compute type when the input and output is fp16,
  * faster but it may loss precision.
  */
 PADDLE_DEFINE_EXPORTED_bool(
     gemm_use_half_precision_compute_type,
-    true,
+    false,
     "Whether to use fp16 compute type when the input and output is fp16, "
     "faster but it may loss precision in most case. If true, the compute "
-    "type will be set to fp32. Default is true.");
+    "type will be set to fp16. Default is false.");
 
 /**
  * CUDA related FLAG
diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc
index 5a7b2cf16a1f8cdc896da44193befe43674b23cd..55e895c6622a62d46aab018c7487d65c82c64d8c 100644
--- a/paddle/phi/infermeta/unary.cc
+++ b/paddle/phi/infermeta/unary.cc
@@ -4596,17 +4596,26 @@ void UniqueRawInferMeta(const MetaTensor& x,
                         MetaTensor* index,
                         MetaTensor* counts) {
   if (!is_sorted) {
-    PADDLE_ENFORCE_EQ(
-        x.dims().size(),
-        1,
-        phi::errors::InvalidArgument("The Input(X) should be 1-D Tensor, "
-                                     "But now the dims of Input(X) is %d.",
-                                     x.dims().size()));
+    PADDLE_ENFORCE_EQ(x.dims().size() == 1 || x.dims().size() == 0,
+                      true,
+                      phi::errors::InvalidArgument(
+                          "The Input(X) should be 0-D or 1-D Tensor, "
+                          "But now the dims of Input(X) is %d.",
+                          x.dims().size()));
     out->set_dims(phi::make_ddim({-1}));
     index->set_dims(x.dims());
     return;
   }
 
+  if (x.dims().size() == 0) {
+    PADDLE_ENFORCE_EQ(axis.empty(),
+                      true,
+                      phi::errors::InvalidArgument(
+                          "The Input(X) with 0-D Tensor, axis must be None"
+                          "But now the axis is %d.",
+                          axis[0]));
+  }
+
   if (axis.empty()) {
     out->set_dims(phi::make_ddim({-1}));
     if (return_inverse) {
diff --git a/paddle/phi/kernels/autotune/cache.h b/paddle/phi/kernels/autotune/cache.h
index 5766ae8e8a4418b6369126bdeb906f3a6e2421ac..e58b1ff00c9a1720c59114059a894fdc566142db 100644
--- a/paddle/phi/kernels/autotune/cache.h
+++ b/paddle/phi/kernels/autotune/cache.h
@@ -43,14 +43,13 @@ enum class AlgorithmType {
   kConvForward = 1,
   kConvBackwardData = 2,
   kConvBackwardFilter = 3,
+  kTranspose = 4,
 #ifdef PADDLE_WITH_CUDNN_FRONTEND
-  kConvForwardV8 = 4,
-  kConvBackwardDataV8 = 5,
-  kConvBackwardFilterV8 = 6,
-  kTranspose = 7,
+  kConvForwardV8 = 5,
+  kConvBackwardDataV8 = 6,
+  kConvBackwardFilterV8 = 7,
   kAlgorithmCount = 8
 #else
-  kTranspose = 4,
   kAlgorithmCount = 5
 #endif
 };
diff --git a/paddle/phi/kernels/clip_grad_kernel.h b/paddle/phi/kernels/clip_grad_kernel.h
index 8a7e5b99fd9248e752dc22d33695fde533b78016..bc6245ce90eabe2a472996e944fb3844697832a1 100644
--- a/paddle/phi/kernels/clip_grad_kernel.h
+++ b/paddle/phi/kernels/clip_grad_kernel.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/paddle/phi/kernels/funcs/concat_and_split_functor.cu b/paddle/phi/kernels/funcs/concat_and_split_functor.cu
index af2ab0a8a70a36aa0d69221d27916533f3d16afa..fa663528eb0158fa75f8d3e86f83115621491816 100644
--- a/paddle/phi/kernels/funcs/concat_and_split_functor.cu
+++ b/paddle/phi/kernels/funcs/concat_and_split_functor.cu
@@ -463,12 +463,17 @@ void DispatchConcatKernel(const phi::GPUContext& ctx,
   constexpr IndexT MaxVecSize = 16 / sizeof(T);
   bool find_vecsize_flag = false;
   IndexT dispatch_vec_size = 1;
+
+  auto output_data = reinterpret_cast<std::uintptr_t>(output->data());
   for (IndexT vec_size = MaxVecSize; vec_size > 0; vec_size /= 2) {
-    for (IndexT idx = 0; idx < in_num + 1; idx++) {
+    const IndexT mov_size = vec_size * sizeof(T);
+    for (IndexT idx = 1; idx < in_num + 1; idx++) {
+      auto input_data = reinterpret_cast<std::uintptr_t>(inputs_data[idx - 1]);
       // Since input_cols[0] is 0, we need to jump.
-      const IndexT input_col = inputs_col[idx + 1] - inputs_col[idx];
-      if (input_col % vec_size == 0) {
-        if (idx == in_num - 1) {
+      const IndexT input_col = inputs_col[idx] - inputs_col[idx - 1];
+      if (input_col % vec_size == 0 && output_data % mov_size == 0 &&
+          input_data % mov_size == 0) {
+        if (idx == in_num) {
           find_vecsize_flag = true;
         }
       } else {
diff --git a/paddle/phi/kernels/impl/solve_kernel_impl.h b/paddle/phi/kernels/impl/solve_kernel_impl.h
index b0e6b2b6cc02591b1e3674da9ae6318cdddaadb7..d5ecfdff21a998138a779d65c45ace7321940483 100644
--- a/paddle/phi/kernels/impl/solve_kernel_impl.h
+++ b/paddle/phi/kernels/impl/solve_kernel_impl.h
@@ -169,7 +169,7 @@ static void linalg_solve(const Context& dev_ctx,
     out_tmp.Resize(out->dims());
     out_tmp = *out;
 
-    phi::SqueezeInferKernel<T, Context>(dev_ctx, out_tmp, {-1}, out);
+    phi::Squeeze<T, Context>(dev_ctx, out_tmp, {-1}, out);
   } else {
     PADDLE_ENFORCE_EQ(
         x_dim[x_dim_size - 1],
diff --git a/paddle/phi/kernels/onednn/matmul_grad_kernel.cc b/paddle/phi/kernels/onednn/matmul_grad_kernel.cc
index fec008e7a106e0ccf4c6f17f9c75335e42350218..f9b45d4bc441df067cd0dad4ff89df90a3c484ab 100644
--- a/paddle/phi/kernels/onednn/matmul_grad_kernel.cc
+++ b/paddle/phi/kernels/onednn/matmul_grad_kernel.cc
@@ -19,37 +19,64 @@
 
 namespace phi {
 
-std::vector<int64_t> ExtendDimsWithOnes(const std::vector<int64_t> &dims,
-                                        int new_size) {
-  std::vector<int64_t> new_dims(new_size, 1);
-  for (size_t i = 0; i < dims.size(); ++i) {
-    new_dims[new_size - dims.size() + i] = dims[i];
+void CalculateMatrixDims(const std::vector<int64_t> &x_dims,
+                         const std::vector<int64_t> &y_dims,
+                         const std::vector<int64_t> &out_dims,
+                         std::vector<int64_t> *x_bd_dims,
+                         std::vector<int64_t> *y_bd_dims,
+                         std::vector<int64_t> *out_bd_dims,
+                         bool trans_x,
+                         bool trans_y) {
+  if (x_dims.size() == 1) {
+    (*x_bd_dims)[x_bd_dims->size() - 1] = x_dims[0];
+  } else if (x_dims.size() == 2) {
+    (*x_bd_dims)[x_bd_dims->size() - 1] = x_dims[1];
+    (*x_bd_dims)[x_bd_dims->size() - 2] = x_dims[0];
+  } else {
+    for (size_t i = 0; i < x_dims.size(); ++i) {
+      (*x_bd_dims)[x_bd_dims->size() - x_dims.size() + i] = x_dims[i];
+    }
+  }
+  if (y_dims.size() == 1) {
+    (*y_bd_dims)[x_bd_dims->size() - 2] = y_dims[0];
+  } else if (y_dims.size() == 2) {
+    (*y_bd_dims)[y_bd_dims->size() - 1] = y_dims[1];
+    (*y_bd_dims)[y_bd_dims->size() - 2] = y_dims[0];
+  } else {
+    for (size_t i = 0; i < y_dims.size(); ++i) {
+      (*y_bd_dims)[y_bd_dims->size() - y_dims.size() + i] = y_dims[i];
+    }
+  }
+
+  for (size_t i = 0; i < x_bd_dims->size() - 2; ++i) {
+    (*out_bd_dims)[i] = std::max((*x_bd_dims)[i], (*y_bd_dims)[i]);
   }
+  int h_idx = trans_x ? x_bd_dims->size() - 1 : x_bd_dims->size() - 2;
+  int w_idx = trans_y ? y_bd_dims->size() - 2 : y_bd_dims->size() - 1;
 
-  return new_dims;
+  (*out_bd_dims)[x_bd_dims->size() - 2] = (*x_bd_dims)[h_idx];
+  (*out_bd_dims)[y_bd_dims->size() - 1] = (*y_bd_dims)[w_idx];
 }
 
 template <typename T>
 void CalculateGradMatrixDims(const OneDNNContext &dev_ctx,
                              DenseTensor *dx_tmp,
                              DenseTensor *dy_tmp,
-                             const std::vector<int64_t> &dx_dims,
-                             const std::vector<int64_t> &dy_dims,
                              std::vector<int64_t> *dx_bd_dims,
                              std::vector<int64_t> *dy_bd_dims) {
-  for (size_t i = 0; i < dx_dims.size() - 2; ++i) {
-    if (dx_dims[i] != dy_dims[i]) {
-      if (dx_dims[i] == 1) {
-        (*dx_bd_dims)[i] = dy_dims[i];
+  for (size_t i = 0; i < dx_bd_dims->size() - 2; ++i) {
+    if ((*dx_bd_dims)[i] != (*dy_bd_dims)[i]) {
+      if ((*dx_bd_dims)[i] == 1) {
+        (*dx_bd_dims)[i] = (*dy_bd_dims)[i];
       } else {
-        (*dy_bd_dims)[i] = dx_dims[i];
+        (*dy_bd_dims)[i] = (*dx_bd_dims)[i];
       }
     }
   }
 
-  dx_tmp->Resize(make_ddim((*dx_bd_dims)));
+  dx_tmp->Resize(make_ddim(*dx_bd_dims));
   dev_ctx.template Alloc<T>(dx_tmp);
-  dy_tmp->Resize(make_ddim((*dy_bd_dims)));
+  dy_tmp->Resize(make_ddim(*dy_bd_dims));
   dev_ctx.template Alloc<T>(dy_tmp);
 }
 
@@ -58,7 +85,7 @@ void ReduceSumForMatmulGradOutput(const OneDNNContext &dev_ctx,
                                   const DenseTensor *dx_tmp,
                                   DenseTensor *dx,
                                   const std::vector<int64_t> &dx_dims,
-                                  const std::vector<int64_t> &squeezed_dims) {
+                                  const std::vector<int64_t> &x_dims) {
   funcs::ReductionOneDNNHandler<T> handler(dnnl::algorithm::reduction_sum,
                                            0.0f,
                                            0.0f,
@@ -66,7 +93,7 @@ void ReduceSumForMatmulGradOutput(const OneDNNContext &dev_ctx,
                                            dev_ctx.GetPlace(),
                                            dx_tmp,
                                            dx,
-                                           dx_dims);
+                                           x_dims);
 
   auto src_memory_p = handler.AcquireSrcMemory(dx_tmp);
   auto dst_memory_p = handler.AcquireDstMemory(dx);
@@ -79,8 +106,6 @@ void ReduceSumForMatmulGradOutput(const OneDNNContext &dev_ctx,
 
   reduction_p->execute(astream, reduction_args);
   astream.wait();
-
-  dx->set_mem_desc(dst_memory_p->get_desc().reshape(squeezed_dims));
 }
 
 template <typename T, typename Context>
@@ -99,64 +124,67 @@ void MatmulGradKernel(const Context &dev_ctx,
   size_t ndims = std::max(x_dims.size(), y_dims.size());
   ndims = std::max<size_t>(ndims, 3);
 
-  if (x_dims.size() != ndims) {
-    x_dims = ExtendDimsWithOnes(x_dims, ndims);
-  }
-  if (y_dims.size() != ndims) {
-    y_dims = ExtendDimsWithOnes(y_dims, ndims);
-  }
-  if (dout_dims.size() != ndims) {
-    dout_dims = ExtendDimsWithOnes(dout_dims, ndims);
-  }
-
   // in broadcasting scenario new memory is required because
   // reduce sum must be calculated upon broadcasted dims
   DenseTensor dx_tmp, dy_tmp;
-  std::vector<int64_t> dx_bd_dims(x_dims);
-  std::vector<int64_t> dy_bd_dims(y_dims);
+  std::vector<int64_t> dout_bd_dims(ndims, 1);
+  std::vector<int64_t> x_bd_dims(ndims, 1);
+  std::vector<int64_t> y_bd_dims(ndims, 1);
+
+  CalculateMatrixDims(x_dims,
+                      y_dims,
+                      dout_dims,
+                      &x_bd_dims,
+                      &y_bd_dims,
+                      &dout_bd_dims,
+                      transpose_x,
+                      transpose_y);
+
+  std::vector<int64_t> dx_bd_dims(x_bd_dims);
+  std::vector<int64_t> dy_bd_dims(y_bd_dims);
 
   CalculateGradMatrixDims<T>(
-      dev_ctx, &dx_tmp, &dy_tmp, x_dims, y_dims, &dx_bd_dims, &dy_bd_dims);
+      dev_ctx, &dx_tmp, &dy_tmp, &dx_bd_dims, &dy_bd_dims);
 
   if (transpose_x && transpose_y) {
     funcs::ExecuteMatmul<T, T>(
-        dev_ctx, y, dout, y_dims, dout_dims, true, true, &dx_tmp);
+        dev_ctx, y, dout, y_bd_dims, dout_bd_dims, true, true, &dx_tmp);
     funcs::ExecuteMatmul<T, T>(
-        dev_ctx, dout, x, dout_dims, x_dims, true, true, &dy_tmp);
+        dev_ctx, dout, x, dout_bd_dims, x_bd_dims, true, true, &dy_tmp);
   } else if (transpose_x) {
     funcs::ExecuteMatmul<T, T>(
-        dev_ctx, y, dout, y_dims, dout_dims, false, true, &dx_tmp);
+        dev_ctx, y, dout, y_bd_dims, dout_bd_dims, false, true, &dx_tmp);
     funcs::ExecuteMatmul<T, T>(
-        dev_ctx, x, dout, x_dims, dout_dims, false, false, &dy_tmp);
+        dev_ctx, x, dout, x_bd_dims, dout_bd_dims, false, false, &dy_tmp);
   } else if (transpose_y) {
     funcs::ExecuteMatmul<T, T>(
-        dev_ctx, dout, y, dout_dims, y_dims, false, false, &dx_tmp);
+        dev_ctx, dout, y, dout_bd_dims, y_bd_dims, false, false, &dx_tmp);
     funcs::ExecuteMatmul<T, T>(
-        dev_ctx, dout, x, dout_dims, x_dims, true, false, &dy_tmp);
+        dev_ctx, dout, x, dout_bd_dims, x_bd_dims, true, false, &dy_tmp);
   } else {
     funcs::ExecuteMatmul<T, T>(
-        dev_ctx, dout, y, dout_dims, y_dims, false, true, &dx_tmp);
+        dev_ctx, dout, y, dout_bd_dims, y_bd_dims, false, true, &dx_tmp);
     funcs::ExecuteMatmul<T, T>(
-        dev_ctx, x, dout, x_dims, dout_dims, true, false, &dy_tmp);
+        dev_ctx, x, dout, x_bd_dims, dout_bd_dims, true, false, &dy_tmp);
   }
 
-  if (x_dims != dx_bd_dims) {
+  if (x_bd_dims != dx_bd_dims) {
     ReduceSumForMatmulGradOutput<T>(
-        dev_ctx, &dx_tmp, dx, x_dims, vectorize(x.dims()));
+        dev_ctx, &dx_tmp, dx, dx_bd_dims, x_bd_dims);
   } else {
     *dx = std::move(dx_tmp);
   }
-  if (y_dims != dy_bd_dims) {
+  if (y_bd_dims != dy_bd_dims) {
     ReduceSumForMatmulGradOutput<T>(
-        dev_ctx, &dy_tmp, dy, y_dims, vectorize(y.dims()));
+        dev_ctx, &dy_tmp, dy, dy_bd_dims, y_bd_dims);
   } else {
     *dy = std::move(dy_tmp);
   }
 
+  dx->set_mem_desc(x.mem_desc());
   dx->Resize(x.dims());
-  dx->set_mem_desc(x.mem_desc().reshape(vectorize(x.dims())));
+  dy->set_mem_desc(y.mem_desc());
   dy->Resize(y.dims());
-  dy->set_mem_desc(y.mem_desc().reshape(vectorize(y.dims())));
 }
 
 template <typename T, typename Context>
diff --git a/paddle/phi/kernels/reduce_sum_kernel.cc b/paddle/phi/kernels/reduce_sum_kernel.cc
index c6cfe42566372fe59303b30c09b84a15ab18cd39..a3ff565fce2734c804edcd220a5c90c4f7d68ffe 100644
--- a/paddle/phi/kernels/reduce_sum_kernel.cc
+++ b/paddle/phi/kernels/reduce_sum_kernel.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -27,7 +27,8 @@ void SumKernel(const Context& dev_ctx,
                bool keep_dim,
                DenseTensor* out) {
   bool reduce_all = recompute_reduce_all(x, dims);
-  SumRawKernel<T>(dev_ctx, x, dims, keep_dim, reduce_all, out_dtype, out);
+  SumRawKernel<T, Context>(
+      dev_ctx, x, dims, keep_dim, reduce_all, out_dtype, out);
 }
 
 }  // namespace phi
@@ -82,5 +83,8 @@ PD_REGISTER_KERNEL(
 #endif
 
 #if defined(PADDLE_WITH_XPU)
-PD_REGISTER_KERNEL(sum, XPU, ALL_LAYOUT, phi::SumKernel, float) {}
+PD_REGISTER_KERNEL(
+    sum, XPU, ALL_LAYOUT, phi::SumKernel, float, int8_t, int64_t) {
+  kernel->OutputAt(0).SetDataType(paddle::experimental::DataType::UNDEFINED);
+}
 #endif
diff --git a/paddle/phi/kernels/reduce_sum_kernel.h b/paddle/phi/kernels/reduce_sum_kernel.h
index 3bcf025d96bc4c59d40e0a04b4546633583e6401..e994b073fca7fab185259896d55dcebfd046db65 100644
--- a/paddle/phi/kernels/reduce_sum_kernel.h
+++ b/paddle/phi/kernels/reduce_sum_kernel.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/paddle/phi/kernels/squeeze_kernel.cc b/paddle/phi/kernels/squeeze_kernel.cc
index a95a8cc9a2ff206f926b09c19d4b7db392bfd379..d36e42c8126619b0d7d7716786ff2b6458a559a5 100644
--- a/paddle/phi/kernels/squeeze_kernel.cc
+++ b/paddle/phi/kernels/squeeze_kernel.cc
@@ -25,11 +25,7 @@ void SqueezeInferKernel(const Context& dev_ctx,
                         const DenseTensor& x,
                         const IntArray& axes,
                         DenseTensor* out) {
-  auto x_dims = x.dims();
-  std::vector<int32_t> tmp(axes.GetData().begin(), axes.GetData().end());
-  auto out_dims = funcs::GetOutputSqueezeShape(tmp, x_dims, true);
-  out->Resize(out_dims);
-
+  auto out_dims = out->dims();
   dev_ctx.template Alloc<T>(out);
   phi::Copy(dev_ctx, x, dev_ctx.GetPlace(), false, out);
   out->Resize(out_dims);  // copy will reset the dims.
diff --git a/paddle/phi/kernels/squeeze_kernel.h b/paddle/phi/kernels/squeeze_kernel.h
index 8114969ea7de7d97c5e5e92ec9d4aefddeb6defe..fcd994de7bff40fe513515dff4e93ab1fa8a0853 100644
--- a/paddle/phi/kernels/squeeze_kernel.h
+++ b/paddle/phi/kernels/squeeze_kernel.h
@@ -17,6 +17,7 @@
 
 #include "paddle/phi/common/int_array.h"
 #include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/infermeta/unary.h"
 
 namespace phi {
 
@@ -33,4 +34,14 @@ void SqueezeKernel(const Context& dev_ctx,
                    DenseTensor* out,
                    DenseTensor* xshape);
 
+template <typename T, typename Context>
+void Squeeze(const Context& dev_ctx,
+             const DenseTensor& x,
+             const IntArray& axes,
+             DenseTensor* out) {
+  MetaTensor meta_out(out);
+  SqueezeInferMeta(x, axes, &meta_out);
+  SqueezeInferKernel<T, Context>(dev_ctx, x, axes, out);
+}
+
 }  // namespace phi
diff --git a/paddle/phi/kernels/xpu/bitwise.cc b/paddle/phi/kernels/xpu/bitwise.cc
index a897a37acd20cd2726a13dbf6b3f471a7c3b5531..019acf52f820593a783e37dd37b743674a02be31 100644
--- a/paddle/phi/kernels/xpu/bitwise.cc
+++ b/paddle/phi/kernels/xpu/bitwise.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -19,51 +19,18 @@
 
 namespace phi {
 
-template <typename T, typename Context>
-void BitwiseAndKernel(const Context& ctx,
-                      const DenseTensor& x,
-                      const DenseTensor& y,
-                      DenseTensor* out) {
-  ctx.template Alloc<T>(out);
-  int r = xpu::logical_and(
-      ctx.x_context(), x.data<T>(), y.data<T>(), out->data<T>(), x.numel());
-  PADDLE_ENFORCE_XDNN_SUCCESS(r, "bitwise and");
-}
-
-template <typename T, typename Context>
-void BitwiseOrKernel(const Context& ctx,
-                     const DenseTensor& x,
-                     const DenseTensor& y,
-                     DenseTensor* out) {
-  ctx.template Alloc<T>(out);
-  int r = xpu::logical_or(
-      ctx.x_context(), x.data<T>(), y.data<T>(), out->data<T>(), x.numel());
-  PADDLE_ENFORCE_XDNN_SUCCESS(r, "bitwise or");
-}
-
-template <typename T, typename Context>
-void BitwiseXorKernel(const Context& ctx,
-                      const DenseTensor& x,
-                      const DenseTensor& y,
-                      DenseTensor* out) {
-  ctx.template Alloc<T>(out);
-  int r = xpu::logical_xor(
-      ctx.x_context(), x.data<T>(), y.data<T>(), out->data<T>(), x.numel());
-  PADDLE_ENFORCE_XDNN_SUCCESS(r, "bitwise xor");
-}
-
 template <typename T, typename Context>
 void BitwiseNotKernel(const Context& ctx,
                       const DenseTensor& x,
                       DenseTensor* out) {
+  using XPUDataType = typename XPUTypeTrait<T>::Type;
   ctx.template Alloc<T>(out);
-  int r =
-      xpu::logical_not(ctx.x_context(), x.data<T>(), out->data<T>(), x.numel());
+  int r = xpu::logical_not(ctx.x_context(),
+                           reinterpret_cast<const XPUDataType*>(x.data<T>()),
+                           reinterpret_cast<XPUDataType*>(out->data<T>()),
+                           x.numel());
   PADDLE_ENFORCE_XDNN_SUCCESS(r, "bitwise not");
 }
 }  // namespace phi
 
-PD_REGISTER_KERNEL(bitwise_and, XPU, ALL_LAYOUT, phi::BitwiseAndKernel, bool) {}
-PD_REGISTER_KERNEL(bitwise_or, XPU, ALL_LAYOUT, phi::BitwiseOrKernel, bool) {}
-PD_REGISTER_KERNEL(bitwise_xor, XPU, ALL_LAYOUT, phi::BitwiseXorKernel, bool) {}
 PD_REGISTER_KERNEL(bitwise_not, XPU, ALL_LAYOUT, phi::BitwiseNotKernel, bool) {}
diff --git a/paddle/phi/kernels/xpu/clip_grad_kernel.cc b/paddle/phi/kernels/xpu/clip_grad_kernel.cc
new file mode 100644
index 0000000000000000000000000000000000000000..ff1cc216602d20405866738ad4bdefab4d35030d
--- /dev/null
+++ b/paddle/phi/kernels/xpu/clip_grad_kernel.cc
@@ -0,0 +1,44 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/clip_grad_kernel.h"
+
+#include "paddle/phi/backends/xpu/enforce_xpu.h"
+#include "paddle/phi/core/kernel_registry.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void ClipGradKernel(const Context& ctx,
+                    const DenseTensor& x,
+                    const DenseTensor& out_grad,
+                    const Scalar& min,
+                    const Scalar& max,
+                    DenseTensor* x_grad) {
+  ctx.template Alloc<T>(x_grad);
+  using XPUDataType = typename XPUTypeTrait<T>::Type;
+  int r =
+      xpu::clip_grad(ctx.x_context(),
+                     reinterpret_cast<const XPUDataType*>(x.data<T>()),
+                     reinterpret_cast<const XPUDataType*>(out_grad.data<T>()),
+                     reinterpret_cast<XPUDataType*>(x_grad->data<T>()),
+                     x.numel(),
+                     min.to<T>(),
+                     max.to<T>());
+  PADDLE_ENFORCE_XDNN_SUCCESS(r, "clip_grad");
+}
+}  // namespace phi
+
+PD_REGISTER_KERNEL(
+    clip_grad, XPU, ALL_LAYOUT, phi::ClipGradKernel, float, int) {}
diff --git a/paddle/phi/kernels/xpu/pool_grad_kernel.cc b/paddle/phi/kernels/xpu/pool_grad_kernel.cc
index 349fe1a0f1d77e98fa0d6a116bbd698a02e86ff9..3ae139bdd498611e860f072e339a9271d6a40f2c 100644
--- a/paddle/phi/kernels/xpu/pool_grad_kernel.cc
+++ b/paddle/phi/kernels/xpu/pool_grad_kernel.cc
@@ -104,7 +104,6 @@ void Pool2dGradKernel(const Context& ctx,
   }
 
   if (pooling_type == "max") {
-    // TODO(zhanghuan05) to bind max_pool2d_grad_indices xpu api
     r = xpu::max_pool2d_grad<XPUType>(
         ctx.x_context(),
         reinterpret_cast<const XPUType*>(x.data<T>()),
@@ -142,6 +141,67 @@ void Pool2dGradKernel(const Context& ctx,
   }
   PADDLE_ENFORCE_XDNN_SUCCESS(r, "pool2dgrad");
 }
+
+template <typename T, typename Context>
+void MaxPool2dWithIndexGradKernel(const Context& ctx,
+                                  const DenseTensor& x,
+                                  const DenseTensor& mask,
+                                  const DenseTensor& dout,
+                                  const std::vector<int>& kernel_size,
+                                  const std::vector<int>& strides_t,
+                                  const std::vector<int>& paddings_t,
+                                  bool global_pooling,
+                                  bool adaptive,
+                                  DenseTensor* dx) {
+  using XPUType = typename XPUTypeTrait<T>::Type;
+
+  ctx.template Alloc<T>(dx);
+  auto input_grad = reinterpret_cast<XPUType*>(dx->data<T>());
+  std::vector<int> ksize(kernel_size);
+  std::vector<int> strides(strides_t);
+  std::vector<int> paddings(paddings_t);
+  const auto* index_data = mask.data<int>();
+
+  PADDLE_ENFORCE_NOT_NULL(index_data,
+                          errors::NotFound("index data should not be nullptr"));
+  PADDLE_ENFORCE_EQ(
+      ksize.size(),
+      2,
+      phi::errors::InvalidArgument("The Pool2d XPU OP only support 2 "
+                                   "dimension pooling!, but received "
+                                   "%d-dimension pool kernel size",
+                                   ksize.size()));
+  global_pooling = global_pooling || (adaptive && (ksize[0] * ksize[1] == 1));
+  if (global_pooling) {
+    for (size_t i = 0; i < ksize.size(); ++i) {
+      paddings[i] = 0;
+      ksize[i] = static_cast<int>(dx->dims()[i + 2]);
+    }
+  }
+  const int n = dx->dims()[0];
+  const int c = dx->dims()[1];
+  const int in_h = dx->dims()[2];
+  const int in_w = dx->dims()[3];
+  auto output_grad = reinterpret_cast<const XPUType*>(dout.data<T>());
+
+  int r = xpu::Error_t::SUCCESS;
+  // pass a nullptr as input to XDNN is fine as long as index_data exists
+  r = xpu::max_pool2d_grad<XPUType>(ctx.x_context(),
+                                    /*input*/ nullptr,
+                                    /*output*/ nullptr,
+                                    index_data,
+                                    output_grad,
+                                    input_grad,
+                                    n,
+                                    c,
+                                    in_h,
+                                    in_w,
+                                    ksize,
+                                    strides,
+                                    paddings,
+                                    true);
+  PADDLE_ENFORCE_XDNN_SUCCESS(r, "max_pool2d_with_index_grad");
+}
 }  // namespace phi
 
 PD_REGISTER_KERNEL(pool2d_grad,
@@ -150,3 +210,9 @@ PD_REGISTER_KERNEL(pool2d_grad,
                    phi::Pool2dGradKernel,
                    float,
                    phi::dtype::float16) {}
+PD_REGISTER_KERNEL(max_pool2d_with_index_grad,
+                   XPU,
+                   ALL_LAYOUT,
+                   phi::MaxPool2dWithIndexGradKernel,
+                   float,
+                   phi::dtype::float16) {}
diff --git a/paddle/phi/kernels/xpu/pool_kernel.cc b/paddle/phi/kernels/xpu/pool_kernel.cc
index 9278484378e41cc8d947c16ae6ddcf807f96e7a4..92a8d48d1a33347e32de8162f9c087a86a30f0ea 100644
--- a/paddle/phi/kernels/xpu/pool_kernel.cc
+++ b/paddle/phi/kernels/xpu/pool_kernel.cc
@@ -154,7 +154,72 @@ void Pool2dKernel(const Context& ctx,
   }
   PADDLE_ENFORCE_XDNN_SUCCESS(r, "pool2d");
 }
+
+template <typename T, typename Context>
+void MaxPool2dWithIndexKernel(const Context& ctx,
+                              const DenseTensor& x,
+                              const std::vector<int>& kernel_size,
+                              const std::vector<int>& strides_t,
+                              const std::vector<int>& paddings_t,
+                              bool global_pooling,
+                              bool adaptive,
+                              DenseTensor* out,
+                              DenseTensor* mask) {
+  using XPUType = typename XPUTypeTrait<T>::Type;
+
+  ctx.template Alloc<int>(mask);
+  auto* index_data = mask->data<int>();
+
+  std::vector<int> ksize(kernel_size);
+  std::vector<int> strides(strides_t);
+  std::vector<int> paddings(paddings_t);
+
+  PADDLE_ENFORCE_EQ(ksize.size(),
+                    2,
+                    phi::errors::InvalidArgument(
+                        "The Pool2d XPU OP only support 2 dimension pooling!"));
+  PADDLE_ENFORCE_EQ(!adaptive || (ksize[0] * ksize[1] == 1),
+                    true,
+                    phi::errors::InvalidArgument(
+                        "The Pool2d XPU OP does not support (adaptive == "
+                        "true && output_size != 1)"));
+  global_pooling = global_pooling || (adaptive && (ksize[0] * ksize[1] == 1));
+  if (global_pooling) {
+    for (size_t i = 0; i < ksize.size(); ++i) {
+      paddings[i] = 0;
+      ksize[i] = static_cast<int>(x.dims()[i + 2]);
+    }
+  }
+  const int n = x.dims()[0];
+  const int c = x.dims()[1];
+  const int in_h = x.dims()[2];
+  const int in_w = x.dims()[3];
+  auto input = reinterpret_cast<const XPUType*>(x.data<T>());
+  ctx.template Alloc<T>(out);
+  auto output = reinterpret_cast<XPUType*>(out->data<T>());
+  int r = xpu::Error_t::SUCCESS;
+  r = xpu::max_pool2d<XPUType>(ctx.x_context(),
+                               input,
+                               output,
+                               index_data,
+                               n,
+                               c,
+                               in_h,
+                               in_w,
+                               ksize,
+                               strides,
+                               paddings,
+                               true);
+  PADDLE_ENFORCE_XDNN_SUCCESS(r, "max_pool2d_with_index");
+}
 }  // namespace phi
 
 PD_REGISTER_KERNEL(
     pool2d, XPU, ALL_LAYOUT, phi::Pool2dKernel, float, phi::dtype::float16) {}
+
+PD_REGISTER_KERNEL(max_pool2d_with_index,
+                   XPU,
+                   ALL_LAYOUT,
+                   phi::MaxPool2dWithIndexKernel,
+                   float,
+                   phi::dtype::float16) {}
diff --git a/paddle/phi/kernels/xpu/reduce_sum_kernel.cc b/paddle/phi/kernels/xpu/reduce_sum_kernel.cc
index ac13dc3de3e0dd9f38dee199fa8d2794bc19e00c..dd3abc7badbd332d6a627288deb7e019dbb1610f 100644
--- a/paddle/phi/kernels/xpu/reduce_sum_kernel.cc
+++ b/paddle/phi/kernels/xpu/reduce_sum_kernel.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -46,4 +46,5 @@ void SumRawKernel(const Context& dev_ctx,
 
 }  // namespace phi
 
-PD_REGISTER_KERNEL(sum_raw, XPU, ALL_LAYOUT, phi::SumRawKernel, float) {}
+PD_REGISTER_KERNEL(
+    sum_raw, XPU, ALL_LAYOUT, phi::SumRawKernel, float, int8_t, int64_t) {}
diff --git a/paddle/phi/kernels/xpu/transpose_grad_kernel.cc b/paddle/phi/kernels/xpu/transpose_grad_kernel.cc
index 9fce92b8262ab4c328096236eebcf159c795b9d3..2bd23ba2cdc15290d94fa0d3e284c9054efebe19 100644
--- a/paddle/phi/kernels/xpu/transpose_grad_kernel.cc
+++ b/paddle/phi/kernels/xpu/transpose_grad_kernel.cc
@@ -26,6 +26,14 @@ void TransposeGradKernel(const Context& dev_ctx,
                          DenseTensor* x_grad) {
   using XPUType = typename XPUTypeTrait<T>::Type;
   dev_ctx.template Alloc<T>(x_grad);
+  if (x_grad->numel() == 0) {
+    return;
+  }
+  if (axis.size() == 0) {
+    phi::Copy<Context>(dev_ctx, out_grad, dev_ctx.GetPlace(), false, x_grad);
+    return;
+  }
+
   std::vector<int> reversed_axis(axis);
   for (size_t i = 0; i < axis.size(); i++) {
     reversed_axis[axis[i]] = i;
diff --git a/python/paddle/amp/auto_cast.py b/python/paddle/amp/auto_cast.py
index 9f0b6ac26927f68cc637e8dc5230c59fbe20aa37..6d0bc89296a28c8776764aa375c00d9144b7dc17 100644
--- a/python/paddle/amp/auto_cast.py
+++ b/python/paddle/amp/auto_cast.py
@@ -29,6 +29,7 @@ WHITE_LIST = {
     'conv2d',
     'matmul',
     'matmul_v2',
+    'max_pool2d_with_index',
     'mul',
     'fake_quantize_dequantize_abs_max',
     'fake_quantize_dequantize_moving_average_abs_max',
diff --git a/python/paddle/amp/grad_scaler.py b/python/paddle/amp/grad_scaler.py
index 85e6f6efc6ba3ab9121067ccd9bf4a882fdfa2d0..b6a38e0e28589b072814cf0c7f494cb684d26e6f 100644
--- a/python/paddle/amp/grad_scaler.py
+++ b/python/paddle/amp/grad_scaler.py
@@ -18,7 +18,7 @@ from enum import Enum
 
 import numpy as np
 
-from paddle import _legacy_C_ops
+from paddle import _C_ops, _legacy_C_ops
 from paddle.fluid import core, in_dygraph_mode
 from paddle.fluid.data_feeder import check_type
 from paddle.fluid.dygraph import to_variable
@@ -228,11 +228,9 @@ class AmpScaler:
 
         optimize_ops, params_grads = (None, None)
 
-        if self._found_inf:
-            self._cache_founf_inf = True
-        else:
-            optimize_ops, params_grads = optimizer.minimize(*args, **kwargs)
-            self._cache_founf_inf = False
+        optimizer._set_auxiliary_var('found_inf', self._found_inf)
+        optimize_ops, params_grads = optimizer.minimize(*args, **kwargs)
+        self._cache_founf_inf = optimizer._get_auxiliary_var('found_inf')
 
         if self._use_dynamic_loss_scaling:
             # uopdate the scale
@@ -330,6 +328,9 @@ class AmpScaler:
                     param_grads_fp16,
                     self._temp_found_inf_fp16,
                 )
+                self._found_inf = _C_ops.bitwise_or(
+                    self._found_inf, self._temp_found_inf_fp16
+                )
             if len(param_grads_bf16):
                 _legacy_C_ops.check_finite_and_unscale(
                     param_grads_bf16,
@@ -338,6 +339,9 @@ class AmpScaler:
                     param_grads_bf16,
                     self._temp_found_inf_bf16,
                 )
+                self._found_inf = _C_ops.bitwise_or(
+                    self._found_inf, self._temp_found_inf_bf16
+                )
             if len(param_grads_fp32):
                 _legacy_C_ops.check_finite_and_unscale(
                     param_grads_fp32,
@@ -346,6 +350,9 @@ class AmpScaler:
                     param_grads_fp32,
                     self._temp_found_inf_fp32,
                 )
+                self._found_inf = _C_ops.bitwise_or(
+                    self._found_inf, self._temp_found_inf_fp32
+                )
         else:
             if len(param_grads_fp16):
                 _legacy_C_ops.check_finite_and_unscale(
@@ -354,6 +361,9 @@ class AmpScaler:
                     param_grads_fp16,
                     self._temp_found_inf_fp16,
                 )
+                self._found_inf = _C_ops.bitwise_or(
+                    self._found_inf, self._temp_found_inf_fp16
+                )
             if len(param_grads_bf16):
                 _legacy_C_ops.check_finite_and_unscale(
                     param_grads_bf16,
@@ -361,6 +371,9 @@ class AmpScaler:
                     param_grads_bf16,
                     self._temp_found_inf_bf16,
                 )
+                self._found_inf = _C_ops.bitwise_or(
+                    self._found_inf, self._temp_found_inf_bf16
+                )
             if len(param_grads_fp32):
                 _legacy_C_ops.check_finite_and_unscale(
                     param_grads_fp32,
@@ -368,12 +381,9 @@ class AmpScaler:
                     param_grads_fp32,
                     self._temp_found_inf_fp32,
                 )
-
-        self._found_inf = (
-            self._temp_found_inf_fp16
-            or self._temp_found_inf_bf16
-            or self._temp_found_inf_fp32
-        )
+                self._found_inf = _C_ops.bitwise_or(
+                    self._found_inf, self._temp_found_inf_fp32
+                )
 
         optimizer_state["state"] = OptimizerState.UNSCALED
 
@@ -761,11 +771,9 @@ class GradScaler(AmpScaler):
         if optimizer_state["state"] is OptimizerState.INIT:
             self._unscale(optimizer)
 
-        if self._found_inf:
-            self._cache_founf_inf = True
-        else:
-            optimizer.step()
-            self._cache_founf_inf = False
+        optimizer._set_auxiliary_var('found_inf', self._found_inf)
+        optimizer.step()
+        self._cache_founf_inf = optimizer._get_auxiliary_var('found_inf')
 
         optimizer_state["state"] = OptimizerState.STEPPED
 
diff --git a/python/paddle/distributed/fleet/fleet_executor_utils.py b/python/paddle/distributed/fleet/fleet_executor_utils.py
index 8f9101c38f75b150b7807f1de53e327b10a92932..1ff4b99198a29818655e0e0441e68be3653fe5a1 100755
--- a/python/paddle/distributed/fleet/fleet_executor_utils.py
+++ b/python/paddle/distributed/fleet/fleet_executor_utils.py
@@ -26,24 +26,24 @@ class TaskNode:
         self,
         rank,
         max_run_times,
-        max_slot_times,
         role=None,
         node_type=None,
         task_id=0,
         ops=None,
         program=None,
         lazy_initialize=False,
+        cond_var_name=None,
     ):
         """
         :param rank (int): Current rank of the task node.
         :param max_run_times (int): The max run times of the task node.
-        :param max_slot_times (int): The mas slot times of the task node.
         :param role (int): The role of the task node. (Will be removed in the future)
         :param node_type (str): The type of the task node.
         :param task_id (int): The id of task node.
         :param ops (list): A list of op.desc to init the task node. (Will be removed in the future)
         :param program (Program): An instance of Program to init the task node.
         :param lazy_initialize (bool): In user-defined task, the program may change adding feed/fetch op. As efficient consideration, the task node will have the C++ object later.
+        :param cond_var_name (string): Indicate the cond var name of while.
         """
         assert (ops is not None) ^ (
             program is not None
@@ -54,10 +54,10 @@ class TaskNode:
         self.id = int(task_id)
         self.rank = rank
         self.max_run_times = max_run_times
-        self.max_slot_times = max_slot_times
         self.node_type = node_type
         self.program = program
         self.lazy_initialize = lazy_initialize
+        self.cond_var_name = cond_var_name
         self.run_pre_steps = None
         self.run_at_offset = None
         self.node = None
@@ -69,11 +69,18 @@ class TaskNode:
                     role is not None and task_id is not None
                 ), "If init task node with ops, should provide `role` and `task_id`."
                 self.node = core.TaskNode(
-                    role, ops, rank, task_id, max_run_times, max_slot_times
+                    role,
+                    ops,
+                    rank,
+                    task_id,
+                    max_run_times,
                 )
             else:
                 self.node = core.TaskNode(
-                    program.desc, rank, self.id, max_run_times, max_slot_times
+                    program.desc,
+                    rank,
+                    self.id,
+                    max_run_times,
                 )
             if self.node_type:
                 self.node.set_type(self.node_type)
@@ -85,7 +92,6 @@ class TaskNode:
                 self.rank,
                 self.id,
                 self.max_run_times,
-                self.max_slot_times,
             )
             if self.node_type:
                 self.node.set_type(self.node_type)
@@ -93,10 +99,12 @@ class TaskNode:
                 self.node.set_run_pre_steps(self.run_pre_steps)
             if self.run_at_offset:
                 self.node.set_run_at_offset(self.run_at_offset)
+            if self.cond_var_name:
+                self.node.set_cond_var_name(self.cond_var_name)
             for up in self.upstreams:
-                self.node.add_upstream_task(up[0], up[1])
+                self.node.add_upstream_task(up[0], up[1], up[2])
             for down in self.downstreams:
-                self.node.add_downstream_task(down[0], down[1])
+                self.node.add_downstream_task(down[0], down[1], down[2])
             self.lazy_initialize = False
         return self.node
 
@@ -124,17 +132,21 @@ class TaskNode:
         else:
             self.node.set_run_at_offset(offset)
 
-    def add_upstream_task(self, upstream, buffer_size=2):
+    def add_upstream_task(
+        self, upstream, buffer_size=2, depend_type=core.DependType.NORMAL
+    ):
         if self.lazy_initialize:
-            self.upstreams.append((upstream, buffer_size))
+            self.upstreams.append((upstream, buffer_size, depend_type))
         else:
-            self.node.add_upstream_task(upstream, buffer_size)
+            self.node.add_upstream_task(upstream, buffer_size, depend_type)
 
-    def add_downstream_task(self, downstream, buffer_size=2):
+    def add_downstream_task(
+        self, downstream, buffer_size=2, depend_type=core.DependType.NORMAL
+    ):
         if self.lazy_initialize:
-            self.downstreams.append((downstream, buffer_size))
+            self.downstreams.append((downstream, buffer_size, depend_type))
         else:
-            self.node.add_downstream_task(downstream, buffer_size)
+            self.node.add_downstream_task(downstream, buffer_size, depend_type)
 
     def task_id(self):
         return self.id
@@ -309,33 +321,28 @@ class FleetExecutorUtils:
         return task_node_map
 
     def construct_task_nodes_1f1b(self, program_map):
-        max_slot_times = int(self.max_run_times - self.coord['pp_idx'])
         cur_start_id = int(self.rank * self.num_of_functionality)
         lr_task_node = TaskNode(
             rank=self.rank,
             max_run_times=self.max_run_times,
-            max_slot_times=max_slot_times,
             program=program_map["lr"],
             task_id=cur_start_id,
         )
         fwd_task_node = TaskNode(
             rank=self.rank,
             max_run_times=self.max_run_times,
-            max_slot_times=max_slot_times,
             program=program_map["fwd"],
             task_id=cur_start_id + 1,
         )
         bwd_task_node = TaskNode(
             rank=self.rank,
             max_run_times=self.max_run_times,
-            max_slot_times=max_slot_times,
             program=program_map["bwd"],
             task_id=cur_start_id + 2,
         )
         opt_task_node = TaskNode(
             rank=self.rank,
             max_run_times=self.max_run_times,
-            max_slot_times=max_slot_times,
             program=program_map["opt"],
             task_id=cur_start_id + 3,
         )
@@ -354,12 +361,10 @@ class FleetExecutorUtils:
         return task_id_to_rank
 
     def construct_task_nodes_1f1b_op_list(self, op_list_map):
-        max_slot_times = int(self.max_run_times - self.coord['pp_idx'])
         cur_start_id = int(self.rank * self.num_of_functionality)
         lr_task_node = TaskNode(
             rank=self.rank,
             max_run_times=self.max_run_times,
-            max_slot_times=max_slot_times,
             role=int(OpRole.Optimize.LRSched),
             ops=op_list_map["lr"],
             task_id=cur_start_id,
@@ -369,7 +374,6 @@ class FleetExecutorUtils:
         fwd_task_node = TaskNode(
             rank=self.rank,
             max_run_times=self.max_run_times,
-            max_slot_times=max_slot_times,
             role=int(OpRole.Forward),
             ops=op_list_map["fwd"],
             task_id=cur_start_id + 1,
@@ -378,7 +382,6 @@ class FleetExecutorUtils:
         bwd_task_node = TaskNode(
             rank=self.rank,
             max_run_times=self.max_run_times,
-            max_slot_times=max_slot_times,
             role=int(OpRole.Backward),
             ops=op_list_map["bwd"],
             task_id=cur_start_id + 2,
@@ -387,7 +390,6 @@ class FleetExecutorUtils:
         opt_task_node = TaskNode(
             rank=self.rank,
             max_run_times=self.max_run_times,
-            max_slot_times=max_slot_times,
             role=int(OpRole.Optimize),
             ops=op_list_map["opt"],
             task_id=cur_start_id + 3,
@@ -471,7 +473,6 @@ def origin(program, rank):
         rank=rank,
         node_type="Compute",
         max_run_times=1,
-        max_slot_times=1,
     )
     task_id_to_rank = {task_node.task_id(): rank}
     return [task_node.task_node()], task_id_to_rank
diff --git a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py
index 144dc8b6586c3d49f8fba5569dbe6749e93f51f8..c12843f106562c7167978cbccdda8101d198a61f 100644
--- a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py
@@ -41,11 +41,9 @@ class HybridParallelGradScaler:
 
         optimize_ops, params_grads = (None, None)
 
-        if self._found_inf:
-            self._cache_founf_inf = True
-        else:
-            optimize_ops, params_grads = optimizer.minimize(*args, **kwargs)
-            self._cache_founf_inf = False
+        optimizer._set_auxiliary_var('found_inf', self._found_inf)
+        optimize_ops, params_grads = optimizer.minimize(*args, **kwargs)
+        self._cache_founf_inf = optimizer._get_auxiliary_var('found_inf')
 
         if self._use_dynamic_loss_scaling:
             self._update()
diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py
index b1ab77796477790466f84baf1cb1a6afd7d279c4..361b421bbae4be7e1bf9c38b7228cfd677f946e6 100644
--- a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py
+++ b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py
@@ -19,10 +19,10 @@ from types import MethodType
 import numpy as np
 
 import paddle
-from paddle import _legacy_C_ops
+from paddle import _C_ops, _legacy_C_ops
 from paddle.common_ops_import import dygraph_only
+from paddle.fluid import core
 from paddle.fluid.dygraph import to_variable
-from paddle.framework import core
 from paddle.nn import clip
 
 
@@ -231,6 +231,9 @@ def GroupShardedScaler(scaler):
                     param_grads_fp16,
                     temp_found_inf_fp16,
                 )
+                self._found_inf = _C_ops.bitwise_or(
+                    self._found_inf, temp_found_inf_fp16
+                )
             if len(param_grads_fp32):
                 _legacy_C_ops.check_finite_and_unscale(
                     param_grads_fp32,
@@ -238,15 +241,17 @@ def GroupShardedScaler(scaler):
                     param_grads_fp32,
                     temp_found_inf_fp32,
                 )
+                self._found_inf = _C_ops.bitwise_or(
+                    self._found_inf, temp_found_inf_fp32
+                )
 
-        self._found_inf = 1 if temp_found_inf_fp16 or temp_found_inf_fp32 else 0
-        is_found_inf = paddle.to_tensor([self._found_inf], dtype="int32")
+        self._found_inf = self._found_inf.cast("int32")
 
         paddle.distributed.all_reduce(
-            is_found_inf, op=paddle.distributed.ReduceOp.SUM, group=None
+            self._found_inf, op=paddle.distributed.ReduceOp.MAX, group=None
         )
 
-        self._found_inf = is_found_inf.numpy()[0]
+        self._found_inf = self._found_inf.cast("bool")
 
     scaler._unscale = MethodType(unscale_method, scaler)
     return scaler
diff --git a/python/paddle/distributed/fleet/scaler.py b/python/paddle/distributed/fleet/scaler.py
index 003265a86123fa6383ec9b9bc4e3c16064d1fc49..a06b73fd0c3ff86e2e5685f4c3ef9a5064445ba4 100755
--- a/python/paddle/distributed/fleet/scaler.py
+++ b/python/paddle/distributed/fleet/scaler.py
@@ -17,7 +17,7 @@ from types import MethodType
 import numpy as np
 
 import paddle
-from paddle import _legacy_C_ops
+from paddle import _C_ops, _legacy_C_ops
 from paddle.distributed import fleet
 from paddle.fluid.dygraph import to_variable
 from paddle.framework import core
@@ -73,6 +73,9 @@ def distributed_scaler(scaler):
                 param_grads_fp16,
                 temp_found_inf_fp16,
             )
+            self._found_inf = _C_ops.bitwise_or(
+                self._found_inf, temp_found_inf_fp16
+            )
         if len(param_grads_fp32):
             _legacy_C_ops.check_finite_and_unscale(
                 param_grads_fp32,
@@ -80,17 +83,19 @@ def distributed_scaler(scaler):
                 param_grads_fp32,
                 temp_found_inf_fp32,
             )
+            self._found_inf = _C_ops.bitwise_or(
+                self._found_inf, temp_found_inf_fp32
+            )
 
-        self._found_inf = 1 if temp_found_inf_fp16 or temp_found_inf_fp32 else 0
-        is_found_inf = paddle.to_tensor([self._found_inf], dtype="int32")
+        self._found_inf = self._found_inf.cast("int32")
 
         # TODO(shenliang03) Since dp allreduce in the optimizer is
         # after the gradscaler, check_finite needs to synchronize global
         # information. In the future, we should use check_group to speed.
         paddle.distributed.all_reduce(
-            is_found_inf, op=paddle.distributed.ReduceOp.MAX, group=None
+            self._found_inf, op=paddle.distributed.ReduceOp.MAX, group=None
         )
-        self._found_inf = is_found_inf.numpy()[0]
+        self._found_inf = self._found_inf.cast("bool")
 
     # Only data_parallel doesn't need to modify scaler
     fleet_env = fleet.fleet
diff --git a/python/paddle/fft.py b/python/paddle/fft.py
index 74b3bb23fc683e6c5616fed085e0c4280e5a1ad0..b8939d08b588b7cae38a251a4b188219af7ab9cb 100644
--- a/python/paddle/fft.py
+++ b/python/paddle/fft.py
@@ -1275,6 +1275,8 @@ def fftfreq(n, d=1.0, dtype=None, name=None):
             #  Tensor(shape=[5], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
             #           [ 0.        ,  0.40000001,  0.80000001, -0.80000001, -0.40000001])
     """
+    if d * n == 0:
+        raise ValueError("d or n should not be 0.")
 
     dtype = paddle.framework.get_default_dtype()
     val = 1.0 / (n * d)
diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py
index 76401d5c47a9aba70631a88ad05edf4b37db2f79..5169f9f085fe24d7611fd5181bc00366782f72df 100755
--- a/python/paddle/fluid/backward.py
+++ b/python/paddle/fluid/backward.py
@@ -1493,14 +1493,15 @@ def _append_backward_ops_(
 
     # remove some backward ops
     # TODO(Jiabin): Support this in prime later, it will prune add_grad, fix this problem
-    if not core.is_prim_enabled():
+    if not core._is_bwd_prim_enabled():
         not_need_ops = _find_not_need_ops(
             grad_op_descs, ops, input_grad_names_set
         )
-
         grad_op_descs = [
             op_desc for op_desc in grad_op_descs if op_desc not in not_need_ops
         ]
+    else:
+        logging.debug("Runing backward composite and disable find_not_need_ops")
 
     # append op_desc in grad_op_descs to target_block
     op_role_attr_name = core.op_proto_and_checker_maker.kOpRoleAttrName()
diff --git a/python/paddle/fluid/contrib/layers/nn.py b/python/paddle/fluid/contrib/layers/nn.py
index 7ab225c8c9f825d60c7e910be92f0190106f3860..b836dfa451c33b57c873f5fcc019b40985d005d4 100644
--- a/python/paddle/fluid/contrib/layers/nn.py
+++ b/python/paddle/fluid/contrib/layers/nn.py
@@ -98,10 +98,12 @@ def fused_embedding_seq_pool(
         .. code-block:: python
             import numpy as np
             import paddle.fluid as fluid
+            import paddle
+            paddle.enable_static()
 
             dict_size = 20
-            data_t = fluid.layers.data(
-                name='word', shape=[1], dtype='int64', lod_level=1)
+            data_t = paddle.static.data(
+                name='word', shape=[-1, 1], dtype='int64', lod_level=1)
             padding_idx = np.random.randint(1, 10)
             out = fluid.contrib.fused_embedding_seq_pool(
                 input=data_t,
@@ -305,11 +307,13 @@ def multiclass_nms2(
 
 
             import paddle.fluid as fluid
-            boxes = fluid.layers.data(name='bboxes', shape=[81, 4],
+            import paddle
+            paddle.enable_static()
+            boxes = paddle.static.data(name='bboxes', shape=[-1, 81, 4],
                                       dtype='float32', lod_level=1)
-            scores = fluid.layers.data(name='scores', shape=[81],
+            scores = paddle.static.data(name='scores', shape=[-1, 81],
                                       dtype='float32', lod_level=1)
-            out, index = fluid.layers.multiclass_nms2(bboxes=boxes,
+            out, index = fluid.contrib.layers.multiclass_nms2(bboxes=boxes,
                                               scores=scores,
                                               background_label=0,
                                               score_threshold=0.5,
@@ -501,7 +505,9 @@ def shuffle_batch(x, seed=None):
         .. code-block:: python
 
             import paddle.fluid as fluid
-            x = fluid.layers.data(name="x", shape=[-1, 4])
+            import paddle
+            paddle.enable_static()
+            x = paddle.static.data(name="x", shape=[-1, 4])
             out = fluid.contrib.layers.shuffle_batch(x)
     """
     helper = LayerHelper('shuffle_batch', **locals())
@@ -1313,7 +1319,7 @@ def _pull_box_extended_sparse(input, size, extend_size=64, dtype='float32'):
     Examples:
         .. code-block:: python
           import paddle.fluid as fluid
-          data = fluid.layers.data(name='sequence', shape=[1], dtype='int64', lod_level=1)
+          data = paddle.static.data(name='sequence', shape=[-1, 1], dtype='int64', lod_level=1)
           emb, emb_ex = fluid.contrib.layers._pull_box_extended_sparse(input=data, size=8, extend_size=128)
     """
     helper = LayerHelper('pull_box_extended_sparse', **locals())
@@ -1438,15 +1444,14 @@ def correlation(
         .. code-block:: python
 
             import paddle.fluid as fluid
-
-            x1 = fluid.layers.data(name='x1',
-                               shape=x_shape,
-                               dtype=x_type,
-                               append_batch_size=False)
-            x2 = fluid.layers.data(name='x2',
-                                shape=x_shape,
-                                dtype=x_type,
-                                append_batch_size=False)
+            import paddle
+            paddle.enable_static()
+            x1 = paddle.static.data(name='x1',
+                               shape=[2,3,4,5],
+                               dtype="float32")
+            x2 = paddle.static.data(name='x2',
+                                shape=[2,3,4,5],
+                                dtype="float32")
 
 
             out = fluid.contrib.correlation(
@@ -1555,8 +1560,8 @@ def fused_bn_add_act(
             # required: gpu
             def build_program(main_program, startup_program):
                 with fluid.program_guard(main_program, startup_program):
-                    x = fluid.layers.data(name='x', shape=[1, 28, 28], dtype='float32')
-                    y = fluid.layers.data(name="y", shape=[1], dtype='int64')
+                    x = paddle.static.data(name='x', shape=[-1, 1, 28, 28], dtype='float32')
+                    y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64')
                     conv1_1 = paddle.static.nn.conv2d(
                         input=x,
                         filter_size=3,
diff --git a/python/paddle/fluid/contrib/tests/test_correlation.py b/python/paddle/fluid/contrib/tests/test_correlation.py
index 4e9ef9b0fe8f554e71ce98c8d7b33661abc2fd6c..0f614f4324819a8bbaba8f7b6a85aae7d04e0249 100644
--- a/python/paddle/fluid/contrib/tests/test_correlation.py
+++ b/python/paddle/fluid/contrib/tests/test_correlation.py
@@ -85,20 +85,20 @@ class TestCorrelationOp(unittest.TestCase):
         np.set_printoptions(threshold=np.inf)
         x_shape = (2, 10, 3, 3)
         x_type = 'float32'
-        x1 = fluid.layers.data(
+        x1 = paddle.static.data(
             name='x1',
             shape=x_shape,
             dtype=x_type,
-            append_batch_size=False,
-            stop_gradient=False,
         )
-        x2 = fluid.layers.data(
+        x1.desc.set_need_check_feed(False)
+        x1.stop_gradient = False
+        x2 = paddle.static.data(
             name='x2',
             shape=x_shape,
             dtype=x_type,
-            append_batch_size=False,
-            stop_gradient=False,
         )
+        x2.desc.set_need_check_feed(False)
+        x2.stop_gradient = False
 
         x1_np = np.random.randn(2, 3, 4, 5).astype(x_type)
         x2_np = np.random.randn(2, 3, 4, 5).astype(x_type)
diff --git a/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py b/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py
index f99d640799764af631d02a2faef170a46de971e6..5ab54bc7161780859445e31bd3086654a9ff09c5 100644
--- a/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py
+++ b/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py
@@ -110,10 +110,10 @@ def train(net_type, use_cuda, save_dirname, is_local):
     train_program.random_seed = 123
     startup_prog.random_seed = 456
     with fluid.program_guard(train_program, startup_prog):
-        images = fluid.layers.data(
-            name='pixel', shape=data_shape, dtype='float32'
+        images = paddle.static.data(
+            name='pixel', shape=[-1] + data_shape, dtype='float32'
         )
-        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+        label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
 
         if net_type == "vgg":
             print("train vgg net")
@@ -444,11 +444,11 @@ class TestAmpWithNonIterableDataLoader(unittest.TestCase):
         start_prog = paddle.static.Program()
         with paddle.static.program_guard(main_prog, start_prog):
             with paddle.fluid.unique_name.guard():
-                image = fluid.layers.data(
-                    name='image', shape=[3, 224, 224], dtype='float32'
+                image = paddle.static.data(
+                    name='image', shape=[-1, 3, 224, 224], dtype='float32'
                 )
-                label = fluid.layers.data(
-                    name='label', shape=[1], dtype='int64'
+                label = paddle.static.data(
+                    name='label', shape=[-1, 1], dtype='int64'
                 )
                 py_reader = fluid.io.DataLoader.from_generator(
                     feed_list=[image, label],
diff --git a/python/paddle/fluid/contrib/tests/test_model_cast_to_bf16.py b/python/paddle/fluid/contrib/tests/test_model_cast_to_bf16.py
index 7254dd9df31821c2c8653676f22c98030943aa2b..a97638536891873245ab249ce16f7b89ab4eebd5 100644
--- a/python/paddle/fluid/contrib/tests/test_model_cast_to_bf16.py
+++ b/python/paddle/fluid/contrib/tests/test_model_cast_to_bf16.py
@@ -96,14 +96,22 @@ class TestModelCastBF16(unittest.TestCase):
         nn_bf16 = amp.bf16.convert_float_to_uint16(nn)
 
         with self.static_graph():
-            t_bf16 = layers.data(
-                name='t_bf16', shape=[size, size], dtype=np.int32
+            t_bf16 = paddle.static.data(
+                name='t_bf16', shape=[-1, size, size], dtype='int32'
             )
-            tt_bf16 = layers.data(
-                name='tt_bf16', shape=[size, size], dtype=np.int32
+            t_bf16.desc.set_need_check_feed(False)
+            tt_bf16 = paddle.static.data(
+                name='tt_bf16', shape=[-1, size, size], dtype='int32'
             )
-            t = layers.data(name='t', shape=[size, size], dtype='float32')
-            tt = layers.data(name='tt', shape=[size, size], dtype='float32')
+            tt_bf16.desc.set_need_check_feed(False)
+            t = paddle.static.data(
+                name='t', shape=[-1, size, size], dtype='float32'
+            )
+            t.desc.set_need_check_feed(False)
+            tt = paddle.static.data(
+                name='tt', shape=[-1, size, size], dtype='float32'
+            )
+            tt.desc.set_need_check_feed(False)
 
             ret = paddle.add(t, tt)
             ret = paddle.multiply(ret, t)
@@ -143,8 +151,14 @@ class TestModelCastBF16(unittest.TestCase):
         )
 
         with self.static_graph():
-            t = layers.data(name='t', shape=[size, size], dtype='float32')
-            tt = layers.data(name='tt', shape=[size, size], dtype='float32')
+            t = paddle.static.data(
+                name='t', shape=[-1, size, size], dtype='float32'
+            )
+            t.desc.set_need_check_feed(False)
+            tt = paddle.static.data(
+                name='tt', shape=[-1, size, size], dtype='float32'
+            )
+            tt.desc.set_need_check_feed(False)
 
             with amp.bf16.bf16_guard():
                 ret = paddle.add(t, tt)
diff --git a/python/paddle/fluid/contrib/tests/test_multi_precision_fp16_train.py b/python/paddle/fluid/contrib/tests/test_multi_precision_fp16_train.py
index 1d4873817ab3a2b1613dea3ec578fc0cf48e9ab5..75754fb8bb6a88706393e20578e369958608e346 100644
--- a/python/paddle/fluid/contrib/tests/test_multi_precision_fp16_train.py
+++ b/python/paddle/fluid/contrib/tests/test_multi_precision_fp16_train.py
@@ -102,10 +102,10 @@ def train(use_pure_fp16=True, use_nesterov=False, optimizer=""):
     train_program.random_seed = 123
     startup_prog.random_seed = 456
     with fluid.program_guard(train_program, startup_prog):
-        images = fluid.layers.data(
-            name='pixel', shape=data_shape, dtype='float32'
+        images = paddle.static.data(
+            name='pixel', shape=[-1] + data_shape, dtype='float32'
         )
-        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+        label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
         net = resnet_cifar10(images)
         logits = paddle.static.nn.fc(x=net, size=classdim, activation="softmax")
         cost = paddle.nn.functional.softmax_with_cross_entropy(
@@ -275,11 +275,11 @@ class TestAmpWithNonIterableDataLoader(unittest.TestCase):
         start_prog = paddle.static.Program()
         with paddle.static.program_guard(main_prog, start_prog):
             with paddle.fluid.unique_name.guard():
-                image = fluid.layers.data(
-                    name='image', shape=[3, 224, 224], dtype='float32'
+                image = paddle.static.data(
+                    name='image', shape=[-1, 3, 224, 224], dtype='float32'
                 )
-                label = fluid.layers.data(
-                    name='label', shape=[1], dtype='int64'
+                label = paddle.static.data(
+                    name='label', shape=[-1, 1], dtype='int64'
                 )
                 py_reader = fluid.io.DataLoader.from_generator(
                     feed_list=[image, label],
diff --git a/python/paddle/fluid/contrib/tests/test_weight_decay_extend.py b/python/paddle/fluid/contrib/tests/test_weight_decay_extend.py
index 7af54b7d1573830ebdb244ba520bccf76ad67638..870aceb5571aa892e160d56ebcea88f4a9cccaed 100644
--- a/python/paddle/fluid/contrib/tests/test_weight_decay_extend.py
+++ b/python/paddle/fluid/contrib/tests/test_weight_decay_extend.py
@@ -134,10 +134,12 @@ class TestWeightDecay(unittest.TestCase):
         startup_prog = fluid.framework.Program()
 
         with prog_scope_guard(main_prog=main_prog, startup_prog=startup_prog):
-            data = fluid.layers.data(
-                name="words", shape=[1], dtype="int64", lod_level=1
+            data = paddle.static.data(
+                name="words", shape=[-1, 1], dtype="int64", lod_level=1
+            )
+            label = paddle.static.data(
+                name="label", shape=[-1, 1], dtype="int64"
             )
-            label = fluid.layers.data(name="label", shape=[1], dtype="int64")
             avg_cost = model(data, label, self.word_dict_len)
             AdamW = fluid.contrib.extend_with_decoupled_weight_decay(
                 fluid.optimizer.Adam
@@ -158,10 +160,12 @@ class TestWeightDecay(unittest.TestCase):
         startup_prog = fluid.framework.Program()
 
         with prog_scope_guard(main_prog=main_prog, startup_prog=startup_prog):
-            data = fluid.layers.data(
-                name="words", shape=[1], dtype="int64", lod_level=1
+            data = paddle.static.data(
+                name="words", shape=[-1, 1], dtype="int64", lod_level=1
+            )
+            label = paddle.static.data(
+                name="label", shape=[-1, 1], dtype="int64"
             )
-            label = fluid.layers.data(name="label", shape=[1], dtype="int64")
 
             avg_cost = model(data, label, self.word_dict_len)
 
diff --git a/python/paddle/fluid/core.py b/python/paddle/fluid/core.py
index 771caa4ef3c4fa822dd40d0077b90e778bbbb5d6..b17c29a97868aab5d6efd0d96dfa26c97e2245e5 100644
--- a/python/paddle/fluid/core.py
+++ b/python/paddle/fluid/core.py
@@ -17,6 +17,7 @@ import sys
 import os
 import warnings
 import platform
+import logging
 
 has_paddle_dy_lib = False
 
@@ -305,8 +306,13 @@ try:
     from .libpaddle import _Profiler, _ProfilerResult, _RecordEvent
     from .libpaddle import _set_current_stream
     from .libpaddle import _get_phi_kernel_name
-    from .libpaddle import set_prim_enabled
-    from .libpaddle import is_prim_enabled
+
+    # prim controller flags
+    from .libpaddle import __set_bwd_prim_enabled
+    from .libpaddle import _is_bwd_prim_enabled
+    from .libpaddle import __set_fwd_prim_enabled
+    from .libpaddle import _is_fwd_prim_enabled
+    from .libpaddle import __set_all_prim_enabled
 
     if sys.platform != 'win32':
         from .libpaddle import _set_process_pids
@@ -373,36 +379,98 @@ def set_paddle_lib_path():
 
 set_paddle_lib_path()
 
+# We have 3 FLAGS to judge whether prim is enabled
+# FLAGS_prim_forward: Open or close forward prim strategy
+# FLAGS_prim_backward: Open or close backward prim strategy
+# FLAGS_prim_all: Open or close all prim strategy
+#
+#
+# Priorities:
+# if With CINN and Dy2St:
+# # # _set_prim_all_enabled > FLAGS_prim_all > check_and_set_prim_all_enabled == _set_prim_backward_enabled == _set_prim_backward_enabled > FLAGS_prim_forward == FLAGS_prim_backward
+# else:
+# # # _set_prim_all_enabled > FLAGS_prim_all == check_and_set_prim_all_enabled == _set_prim_backward_enabled == _set_prim_backward_enabled > FLAGS_prim_forward == FLAGS_prim_backward
+def __sync_stat_with_flag(flag):
+    if flag is "FLAGS_prim_forward":
+        flag_value = os.getenv("FLAGS_prim_forward")
+        assert flag_value is not None
+        flag_value = flag_value.lower()
+        if flag_value == "false":
+            __set_fwd_prim_enabled(False)
+        elif flag_value == "true":
+            __set_fwd_prim_enabled(True)
+        else:
+            raise TypeError(f"flag {flag} should be true or false.")
+        logging.debug("forward prim enabled: ", bool(_is_fwd_prim_enabled()))
+    elif flag is "FLAGS_prim_backward":
+        flag_value = os.getenv("FLAGS_prim_backward")
+        assert flag_value is not None
+        flag_value = flag_value.lower()
+        if flag_value == "false":
+            __set_bwd_prim_enabled(False)
+        elif flag_value == "true":
+            __set_bwd_prim_enabled(True)
+        else:
+            raise TypeError(f"flag {flag} should be true or false.")
+        logging.debug("backward prim enabled: ", bool(_is_bwd_prim_enabled()))
+    elif flag is "FLAGS_prim_all":
+        flag_value = os.getenv("FLAGS_prim_all")
+        assert flag_value is not None
+        flag_value = flag_value.lower()
+        if flag_value == "false":
+            __set_all_prim_enabled(False)
+        elif flag_value == "true":
+            __set_all_prim_enabled(True)
+        else:
+            raise TypeError(f"flag {flag} should be true or false.")
+        logging.debug(
+            "all prim enabled: ",
+            bool(_is_fwd_prim_enabled() and _is_bwd_prim_enabled()),
+        )
+    else:
+        raise TypeError(
+            f"We only support FLAGS_prim_forward/FLAGS_prim_backward/FLAGS_prim_all but we got {flag}."
+        )
 
-def set_prim_forward(value):
-    """set flag FLAGS_prim_forward."""
-    flag = str(value)
-    if flag.lower() not in ["true", "false", "debug"]:
-        raise TypeError(f"flag {flag} should be string of bool or 'debug'.")
-    os.environ["FLAGS_prim_forward"] = flag
-    return
 
+def _set_prim_backward_enabled(value):
+    __set_bwd_prim_enabled(bool(value))
+    logging.debug("backward prim enabled: ", bool(_is_bwd_prim_enabled()))
 
-def enable_prim_forward():
-    flag = os.getenv("FLAGS_prim_forward", "true").lower()
-    if flag == "false":
-        return False
-    if flag == "debug":
-        return "debug"
-    return True
 
+def _set_prim_forward_enabled(value):
+    __set_fwd_prim_enabled(bool(value))
+    logging.debug("forward prim enabled: ", bool(_is_fwd_prim_enabled()))
 
-def set_prim_backward(value):
-    """set flag FLAGS_prim_backward,"""
-    flag = str(value)
-    if flag.lower() not in ["true", "false"]:
-        raise TypeError(f"flag {flag} should be bool or string of bool.")
-    os.environ["FLAGS_prim_backward"] = flag
-    return
 
+def _set_prim_all_enabled(value):
+    __set_all_prim_enabled(bool(value))
+    logging.debug(
+        "all prim enabled: ",
+        bool(_is_fwd_prim_enabled() and _is_bwd_prim_enabled()),
+    )
 
-def enable_prim_backward():
-    flag = os.getenv("FLAGS_prim_backward", "true")
-    if flag.lower() == "false":
-        return False
-    return True
+
+def __sync_prim_backward_status():
+    flag_value = os.getenv("FLAGS_prim_backward")
+    if flag_value is None:
+        logging.debug("backward prim enabled: ", bool(_is_bwd_prim_enabled()))
+    else:
+        __sync_stat_with_flag("FLAGS_prim_backward")
+
+
+def __sync_prim_forward_status():
+    flag_value = os.getenv("FLAGS_prim_forward")
+    if flag_value is None:
+        logging.debug("forward prim enabled: ", bool(_is_fwd_prim_enabled()))
+    else:
+        __sync_stat_with_flag("FLAGS_prim_forward")
+
+
+def check_and_set_prim_all_enabled():
+    flag_value = os.getenv("FLAGS_prim_all")
+    if flag_value is None:
+        __sync_prim_backward_status()
+        __sync_prim_forward_status()
+    else:
+        __sync_stat_with_flag("FLAGS_prim_all")
diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py
index e3376d8446586607947bf2143fe5c9fe32115dac..2822a87a02172e22c2dc7c6899b7db4f5a7eea0f 100755
--- a/python/paddle/fluid/executor.py
+++ b/python/paddle/fluid/executor.py
@@ -1862,7 +1862,6 @@ class Executor:
                 vardesc = global_block.desc.find_var(varname.encode())
                 varobj = global_block.vars[varname]
 
-                # Can not check var build by fluid.layers.data(), bucause fluid.layers.data() had not set need_check_feed
                 if (
                     vardesc.persistable() == False
                     and vardesc.type() == core.VarDesc.VarType.LOD_TENSOR
diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py
index da2fa96c758bae003952b6c2181b35974231c0bd..8b019114a322b9fd19e18f499e60d574002c7dd9 100644
--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -6957,9 +6957,10 @@ class Parameter(Variable, metaclass=ParameterMetaClass):
             .. code-block:: python
 
                 import paddle.fluid as fluid
+                import paddle
 
                 prog = fluid.default_main_program()
-                rlt = fluid.layers.data("fake_data", shape=[1,1], dtype='float32')
+                rlt = paddle.static.data("fake_data", shape=[-1,1,1], dtype='float32')
                 debug_str = prog.to_string(throw_on_error=True, with_details=False)
                 print(debug_str)
         """
diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/pslib/.gitignore b/python/paddle/fluid/incubate/fleet/parameter_server/pslib/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..cff2988658f2e93376d500883e52339a61e04387
--- /dev/null
+++ b/python/paddle/fluid/incubate/fleet/parameter_server/pslib/.gitignore
@@ -0,0 +1 @@
+ps_pb2.py
diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/pslib/ps_pb2.py b/python/paddle/fluid/incubate/fleet/parameter_server/pslib/ps_pb2.py
deleted file mode 100644
index 4291115b0bcbb679e9d6676b161323e363dbe382..0000000000000000000000000000000000000000
--- a/python/paddle/fluid/incubate/fleet/parameter_server/pslib/ps_pb2.py
+++ /dev/null
@@ -1,3262 +0,0 @@
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Generated by the protocol buffer compiler.  DO NOT EDIT!
-# source: ps.proto
-
-import sys
-
-_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode('latin1'))
-from google.protobuf.internal import enum_type_wrapper
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf import descriptor_pb2
-
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-DESCRIPTOR = _descriptor.FileDescriptor(
-    name='ps.proto',
-    package='paddle',
-    syntax='proto2',
-    serialized_pb=_b(
-        '\n\x08ps.proto\x12\x06paddle\"\xb5\x02\n\x0bPSParameter\x12\x14\n\x0cworker_class\x18\x01 \x01(\t\x12\x14\n\x0cserver_class\x18\x02 \x01(\t\x12\x16\n\x0einstance_class\x18\x03 \x01(\t\x12\x15\n\x0binit_gflags\x18\x04 \x01(\t:\x00\x12-\n\x0cworker_param\x18\x65 \x01(\x0b\x32\x17.paddle.WorkerParameter\x12-\n\x0cserver_param\x18\x66 \x01(\x0b\x32\x17.paddle.ServerParameter\x12\x38\n\rtrainer_param\x18\xad\x02 \x03(\x0b\x32 .paddle.DownpourTrainerParameter\x12\x33\n\x0f\x66s_client_param\x18\xf5\x03 \x01(\x0b\x32\x19.paddle.FsClientParameter\"Q\n\x0fWorkerParameter\x12>\n\x15\x64ownpour_worker_param\x18\x01 \x01(\x0b\x32\x1f.paddle.DownpourWorkerParameter\"Q\n\x0fServerParameter\x12>\n\x15\x64ownpour_server_param\x18\x01 \x01(\x0b\x32\x1f.paddle.DownpourServerParameter\"O\n\x17\x44ownpourWorkerParameter\x12\x34\n\x14\x64ownpour_table_param\x18\x01 \x03(\x0b\x32\x16.paddle.TableParameter\"\xfd\x01\n\x18\x44ownpourTrainerParameter\x12\x30\n\x0b\x64\x65nse_table\x18\x01 \x03(\x0b\x32\x1b.paddle.DenseTableParameter\x12\x32\n\x0csparse_table\x18\x02 \x03(\x0b\x32\x1c.paddle.SparseTableParameter\x12\x1d\n\x15push_sparse_per_batch\x18\x03 \x01(\x05\x12\x1c\n\x14push_dense_per_batch\x18\x04 \x01(\x05\x12\x0f\n\x07skip_op\x18\x05 \x03(\t\x12-\n\x0eprogram_config\x18\x06 \x03(\x0b\x32\x15.paddle.ProgramConfig\"\x99\x01\n\rProgramConfig\x12\x12\n\nprogram_id\x18\x01 \x02(\t\x12\x1c\n\x14push_sparse_table_id\x18\x02 \x03(\x05\x12\x1b\n\x13push_dense_table_id\x18\x03 \x03(\x05\x12\x1c\n\x14pull_sparse_table_id\x18\x04 \x03(\x05\x12\x1b\n\x13pull_dense_table_id\x18\x05 \x03(\x05\"{\n\x13\x44\x65nseTableParameter\x12\x10\n\x08table_id\x18\x01 \x01(\x05\x12\x1b\n\x13\x64\x65nse_variable_name\x18\x02 \x03(\t\x12$\n\x1c\x64\x65nse_gradient_variable_name\x18\x03 \x03(\t\x12\x0f\n\x07\x66\x65\x61_dim\x18\x04 \x01(\x05\"z\n\x14SparseTableParameter\x12\x10\n\x08table_id\x18\x01 \x01(\x05\x12\x13\n\x0b\x66\x65\x61ture_dim\x18\x02 \x01(\x05\x12\x10\n\x08slot_key\x18\x03 \x03(\t\x12\x12\n\nslot_value\x18\x04 \x03(\t\x12\x15\n\rslot_gradient\x18\x05 \x03(\t\"\x86\x01\n\x17\x44ownpourServerParameter\x12\x34\n\x14\x64ownpour_table_param\x18\x01 \x03(\x0b\x32\x16.paddle.TableParameter\x12\x35\n\rservice_param\x18\x02 \x01(\x0b\x32\x1e.paddle.ServerServiceParameter\"\xd7\x01\n\x16ServerServiceParameter\x12*\n\x0cserver_class\x18\x01 \x01(\t:\x14\x44ownpourBrpcPsServer\x12*\n\x0c\x63lient_class\x18\x02 \x01(\t:\x14\x44ownpourBrpcPsClient\x12(\n\rservice_class\x18\x03 \x01(\t:\x11\x44ownpourPsService\x12\x1c\n\x11start_server_port\x18\x04 \x01(\r:\x01\x30\x12\x1d\n\x11server_thread_num\x18\x05 \x01(\r:\x02\x31\x32\"\xc0\x02\n\x0eTableParameter\x12\x10\n\x08table_id\x18\x01 \x01(\x04\x12\x13\n\x0btable_class\x18\x02 \x01(\t\x12\x17\n\tshard_num\x18\x03 \x01(\x04:\x04\x31\x30\x30\x30\x12\x30\n\x08\x61\x63\x63\x65ssor\x18\x04 \x01(\x0b\x32\x1e.paddle.TableAccessorParameter\x12\x1f\n\x04type\x18\x05 \x01(\x0e\x32\x11.paddle.TableType\x12\x1f\n\x10\x63ompress_in_save\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\'\n\x19\x65nable_sparse_table_cache\x18\x07 \x01(\x08:\x04true\x12(\n\x17sparse_table_cache_rate\x18\x08 \x01(\x01:\x07\x30.00055\x12\'\n\x1bsparse_table_cache_file_num\x18\t \x01(\r:\x02\x31\x36\"\xc1\x04\n\x16TableAccessorParameter\x12\x16\n\x0e\x61\x63\x63\x65ssor_class\x18\x01 \x01(\t\x12\x38\n\x10sparse_sgd_param\x18\x02 \x01(\x0b\x32\x1e.paddle.SparseSGDRuleParameter\x12\x36\n\x0f\x64\x65nse_sgd_param\x18\x03 \x01(\x0b\x32\x1d.paddle.DenseSGDRuleParameter\x12\x13\n\x07\x66\x65\x61_dim\x18\x04 \x01(\r:\x02\x31\x31\x12\x15\n\nembedx_dim\x18\x05 \x01(\r:\x01\x38\x12\x1c\n\x10\x65mbedx_threshold\x18\x06 \x01(\r:\x02\x31\x30\x12G\n\x17\x64ownpour_accessor_param\x18\x07 \x01(\x0b\x32&.paddle.DownpourTableAccessorParameter\x12\x45\n\x19table_accessor_save_param\x18\x08 \x03(\x0b\x32\".paddle.TableAccessorSaveParameter\x12\x44\n\x16sparse_commonsgd_param\x18\t \x01(\x0b\x32$.paddle.SparseCommonSGDRuleParameter\x12=\n\x0f\x65mbed_sgd_param\x18\n \x01(\x0b\x32$.paddle.SparseCommonSGDRuleParameter\x12>\n\x10\x65mbedx_sgd_param\x18\x0b \x01(\x0b\x32$.paddle.SparseCommonSGDRuleParameter\"\xba\x02\n\x1e\x44ownpourTableAccessorParameter\x12\x19\n\x0cnonclk_coeff\x18\x01 \x01(\x02:\x03\x30.1\x12\x16\n\x0b\x63lick_coeff\x18\x02 \x01(\x02:\x01\x31\x12\x1b\n\x0e\x62\x61se_threshold\x18\x03 \x01(\x02:\x03\x31.5\x12\x1d\n\x0f\x64\x65lta_threshold\x18\x04 \x01(\x02:\x04\x30.25\x12\x1b\n\x0f\x64\x65lta_keep_days\x18\x05 \x01(\x02:\x02\x31\x36\x12#\n\x15show_click_decay_rate\x18\x06 \x01(\x02:\x04\x30.98\x12\x1d\n\x10\x64\x65lete_threshold\x18\x07 \x01(\x02:\x03\x30.8\x12$\n\x18\x64\x65lete_after_unseen_days\x18\x08 \x01(\x02:\x02\x33\x30\x12\"\n\x17ssd_unseenday_threshold\x18\t \x01(\x05:\x01\x31\"S\n\x1aTableAccessorSaveParameter\x12\r\n\x05param\x18\x01 \x01(\r\x12\x11\n\tconverter\x18\x02 \x01(\t\x12\x13\n\x0b\x64\x65\x63onverter\x18\x03 \x01(\t\"e\n\x10PsRequestMessage\x12\x0e\n\x06\x63md_id\x18\x01 \x02(\r\x12\x10\n\x08table_id\x18\x02 \x01(\r\x12\x0e\n\x06params\x18\x03 \x03(\x0c\x12\x11\n\tclient_id\x18\x04 \x01(\x05\x12\x0c\n\x04\x64\x61ta\x18\x05 \x01(\x0c\"\x85\x01\n\x16SparseSGDRuleParameter\x12\x1b\n\rlearning_rate\x18\x01 \x01(\x01:\x04\x30.05\x12\x18\n\rinitial_g2sum\x18\x02 \x01(\x01:\x01\x33\x12\x1d\n\rinitial_range\x18\x03 \x01(\x01:\x06\x30.0001\x12\x15\n\rweight_bounds\x18\x04 \x03(\x02\"\xc6\x01\n\x1cSparseCommonSGDRuleParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x32\n\x05naive\x18\x02 \x01(\x0b\x32#.paddle.SparseNaiveSGDRuleParameter\x12\x36\n\x07\x61\x64\x61grad\x18\x03 \x01(\x0b\x32%.paddle.SparseAdagradSGDRuleParameter\x12,\n\x04\x61\x64\x61m\x18\x04 \x01(\x0b\x32\x1e.paddle.SparseAdamSGDParameter\"p\n\x1bSparseNaiveSGDRuleParameter\x12\x1b\n\rlearning_rate\x18\x01 \x01(\x01:\x04\x30.05\x12\x1d\n\rinitial_range\x18\x02 \x01(\x01:\x06\x30.0001\x12\x15\n\rweight_bounds\x18\x03 \x03(\x02\"\x8c\x01\n\x1dSparseAdagradSGDRuleParameter\x12\x1b\n\rlearning_rate\x18\x01 \x01(\x01:\x04\x30.05\x12\x18\n\rinitial_g2sum\x18\x02 \x01(\x01:\x01\x33\x12\x1d\n\rinitial_range\x18\x03 \x01(\x01:\x06\x30.0001\x12\x15\n\rweight_bounds\x18\x04 \x03(\x02\"\xc8\x01\n\x16SparseAdamSGDParameter\x12\x1c\n\rlearning_rate\x18\x01 \x01(\x01:\x05\x30.001\x12\x1d\n\rinitial_range\x18\x02 \x01(\x01:\x06\x30.0001\x12\x1d\n\x10\x62\x65ta1_decay_rate\x18\x03 \x01(\x01:\x03\x30.9\x12\x1f\n\x10\x62\x65ta2_decay_rate\x18\x04 \x01(\x01:\x05\x30.999\x12\x1a\n\x0b\x61\x64\x61_epsilon\x18\x05 \x01(\x01:\x05\x31\x65-08\x12\x15\n\rweight_bounds\x18\x06 \x03(\x02\"\xe1\x01\n\x15\x44\x65nseSGDRuleParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12&\n\x04\x61\x64\x61m\x18\x02 \x01(\x0b\x32\x18.paddle.AdamSGDParameter\x12(\n\x05naive\x18\x03 \x01(\x0b\x32\x19.paddle.NaiveSGDParameter\x12,\n\x07summary\x18\x04 \x01(\x0b\x32\x1b.paddle.SummarySGDParameter\x12:\n\x0emoving_average\x18\x05 \x01(\x0b\x32\".paddle.MovingAverageRuleParameter\"\xac\x01\n\x10\x41\x64\x61mSGDParameter\x12\x1c\n\rlearning_rate\x18\x01 \x01(\x01:\x05\x35\x65-06\x12 \n\x0e\x61vg_decay_rate\x18\x02 \x01(\x01:\x08\x30.999993\x12\x1e\n\x0e\x61\x64\x61_decay_rate\x18\x03 \x01(\x01:\x06\x30.9999\x12\x1a\n\x0b\x61\x64\x61_epsilon\x18\x04 \x01(\x01:\x05\x31\x65-08\x12\x1c\n\x0emom_decay_rate\x18\x05 \x01(\x01:\x04\x30.99\"J\n\x11NaiveSGDParameter\x12\x1d\n\rlearning_rate\x18\x01 \x01(\x01:\x06\x30.0002\x12\x16\n\x0e\x61vg_decay_rate\x18\x02 \x01(\x01\";\n\x13SummarySGDParameter\x12$\n\x12summary_decay_rate\x18\x01 \x01(\x01:\x08\x30.999999\".\n\x1aMovingAverageRuleParameter\x12\x10\n\x08momentum\x18\x01 \x01(\x01\"I\n\x11PsResponseMessage\x12\x13\n\x08\x65rr_code\x18\x01 \x02(\x05:\x01\x30\x12\x11\n\x07\x65rr_msg\x18\x02 \x02(\t:\x00\x12\x0c\n\x04\x64\x61ta\x18\x03 \x01(\x0c\"\xd5\x01\n\x11\x46sClientParameter\x12:\n\x07\x66s_type\x18\x01 \x01(\x0e\x32#.paddle.FsClientParameter.FsApiType:\x04HDFS\x12\x0b\n\x03uri\x18\x02 \x01(\t\x12\x0c\n\x04user\x18\x03 \x01(\t\x12\x0e\n\x06passwd\x18\x04 \x01(\t\x12\x13\n\x0b\x62uffer_size\x18\x05 \x01(\x05\x12\x12\n\nhadoop_bin\x18\x33 \x01(\t\x12\x10\n\x08\x61\x66s_conf\x18\x65 \x01(\t\"\x1e\n\tFsApiType\x12\x08\n\x04HDFS\x10\x00\x12\x07\n\x03\x41\x46S\x10\x01*4\n\tTableType\x12\x13\n\x0fPS_SPARSE_TABLE\x10\x00\x12\x12\n\x0ePS_DENSE_TABLE\x10\x01*\xba\x04\n\x07PsCmdID\x12\x17\n\x13PS_PULL_DENSE_TABLE\x10\x00\x12\x17\n\x13PS_PUSH_DENSE_TABLE\x10\x01\x12\x18\n\x14PS_PULL_SPARSE_TABLE\x10\x02\x12\x18\n\x14PS_PUSH_SPARSE_TABLE\x10\x03\x12\x13\n\x0fPS_SHRINK_TABLE\x10\x04\x12\x15\n\x11PS_SAVE_ONE_TABLE\x10\x05\x12\x15\n\x11PS_SAVE_ALL_TABLE\x10\x06\x12\x15\n\x11PS_LOAD_ONE_TABLE\x10\x07\x12\x15\n\x11PS_LOAD_ALL_TABLE\x10\x08\x12\x16\n\x12PS_CLEAR_ONE_TABLE\x10\t\x12\x16\n\x12PS_CLEAR_ALL_TABLE\x10\n\x12\x17\n\x13PS_PUSH_DENSE_PARAM\x10\x0b\x12\x12\n\x0ePS_STOP_SERVER\x10\x0c\x12\x1b\n\x17PS_SAVE_ONE_CACHE_TABLE\x10\r\x12\x1a\n\x16PS_GET_CACHE_THRESHOLD\x10\x0e\x12\x14\n\x10PS_CACHE_SHUFFLE\x10\x0f\x12\x11\n\rPS_COPY_TABLE\x10\x10\x12\x1c\n\x18PS_COPY_TABLE_BY_FEASIGN\x10\x11\x12(\n$PS_PULL_SPARSE_TABLE_WITH_DEPENDENCY\x10\x12\x12(\n$PS_PUSH_SPARSE_TABLE_WITH_DEPENDENCY\x10\x13\x12\x17\n\x13PS_PRINT_TABLE_STAT\x10\x14\x12\x0e\n\nPS_S2S_MSG\x10\x65\x32K\n\tPsService\x12>\n\x07service\x12\x18.paddle.PsRequestMessage\x1a\x19.paddle.PsResponseMessageB\x06\x80\x01\x01\xf8\x01\x01'
-    ),
-)
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-_TABLETYPE = _descriptor.EnumDescriptor(
-    name='TableType',
-    full_name='paddle.TableType',
-    filename=None,
-    file=DESCRIPTOR,
-    values=[
-        _descriptor.EnumValueDescriptor(
-            name='PS_SPARSE_TABLE', index=0, number=0, options=None, type=None
-        ),
-        _descriptor.EnumValueDescriptor(
-            name='PS_DENSE_TABLE', index=1, number=1, options=None, type=None
-        ),
-    ],
-    containing_type=None,
-    options=None,
-    serialized_start=4679,
-    serialized_end=4731,
-)
-_sym_db.RegisterEnumDescriptor(_TABLETYPE)
-
-TableType = enum_type_wrapper.EnumTypeWrapper(_TABLETYPE)
-_PSCMDID = _descriptor.EnumDescriptor(
-    name='PsCmdID',
-    full_name='paddle.PsCmdID',
-    filename=None,
-    file=DESCRIPTOR,
-    values=[
-        _descriptor.EnumValueDescriptor(
-            name='PS_PULL_DENSE_TABLE',
-            index=0,
-            number=0,
-            options=None,
-            type=None,
-        ),
-        _descriptor.EnumValueDescriptor(
-            name='PS_PUSH_DENSE_TABLE',
-            index=1,
-            number=1,
-            options=None,
-            type=None,
-        ),
-        _descriptor.EnumValueDescriptor(
-            name='PS_PULL_SPARSE_TABLE',
-            index=2,
-            number=2,
-            options=None,
-            type=None,
-        ),
-        _descriptor.EnumValueDescriptor(
-            name='PS_PUSH_SPARSE_TABLE',
-            index=3,
-            number=3,
-            options=None,
-            type=None,
-        ),
-        _descriptor.EnumValueDescriptor(
-            name='PS_SHRINK_TABLE', index=4, number=4, options=None, type=None
-        ),
-        _descriptor.EnumValueDescriptor(
-            name='PS_SAVE_ONE_TABLE', index=5, number=5, options=None, type=None
-        ),
-        _descriptor.EnumValueDescriptor(
-            name='PS_SAVE_ALL_TABLE', index=6, number=6, options=None, type=None
-        ),
-        _descriptor.EnumValueDescriptor(
-            name='PS_LOAD_ONE_TABLE', index=7, number=7, options=None, type=None
-        ),
-        _descriptor.EnumValueDescriptor(
-            name='PS_LOAD_ALL_TABLE', index=8, number=8, options=None, type=None
-        ),
-        _descriptor.EnumValueDescriptor(
-            name='PS_CLEAR_ONE_TABLE',
-            index=9,
-            number=9,
-            options=None,
-            type=None,
-        ),
-        _descriptor.EnumValueDescriptor(
-            name='PS_CLEAR_ALL_TABLE',
-            index=10,
-            number=10,
-            options=None,
-            type=None,
-        ),
-        _descriptor.EnumValueDescriptor(
-            name='PS_PUSH_DENSE_PARAM',
-            index=11,
-            number=11,
-            options=None,
-            type=None,
-        ),
-        _descriptor.EnumValueDescriptor(
-            name='PS_STOP_SERVER', index=12, number=12, options=None, type=None
-        ),
-        _descriptor.EnumValueDescriptor(
-            name='PS_SAVE_ONE_CACHE_TABLE',
-            index=13,
-            number=13,
-            options=None,
-            type=None,
-        ),
-        _descriptor.EnumValueDescriptor(
-            name='PS_GET_CACHE_THRESHOLD',
-            index=14,
-            number=14,
-            options=None,
-            type=None,
-        ),
-        _descriptor.EnumValueDescriptor(
-            name='PS_CACHE_SHUFFLE',
-            index=15,
-            number=15,
-            options=None,
-            type=None,
-        ),
-        _descriptor.EnumValueDescriptor(
-            name='PS_COPY_TABLE', index=16, number=16, options=None, type=None
-        ),
-        _descriptor.EnumValueDescriptor(
-            name='PS_COPY_TABLE_BY_FEASIGN',
-            index=17,
-            number=17,
-            options=None,
-            type=None,
-        ),
-        _descriptor.EnumValueDescriptor(
-            name='PS_PULL_SPARSE_TABLE_WITH_DEPENDENCY',
-            index=18,
-            number=18,
-            options=None,
-            type=None,
-        ),
-        _descriptor.EnumValueDescriptor(
-            name='PS_PUSH_SPARSE_TABLE_WITH_DEPENDENCY',
-            index=19,
-            number=19,
-            options=None,
-            type=None,
-        ),
-        _descriptor.EnumValueDescriptor(
-            name='PS_PRINT_TABLE_STAT',
-            index=20,
-            number=20,
-            options=None,
-            type=None,
-        ),
-        _descriptor.EnumValueDescriptor(
-            name='PS_S2S_MSG', index=21, number=101, options=None, type=None
-        ),
-    ],
-    containing_type=None,
-    options=None,
-    serialized_start=4734,
-    serialized_end=5304,
-)
-_sym_db.RegisterEnumDescriptor(_PSCMDID)
-
-PsCmdID = enum_type_wrapper.EnumTypeWrapper(_PSCMDID)
-PS_SPARSE_TABLE = 0
-PS_DENSE_TABLE = 1
-PS_PULL_DENSE_TABLE = 0
-PS_PUSH_DENSE_TABLE = 1
-PS_PULL_SPARSE_TABLE = 2
-PS_PUSH_SPARSE_TABLE = 3
-PS_SHRINK_TABLE = 4
-PS_SAVE_ONE_TABLE = 5
-PS_SAVE_ALL_TABLE = 6
-PS_LOAD_ONE_TABLE = 7
-PS_LOAD_ALL_TABLE = 8
-PS_CLEAR_ONE_TABLE = 9
-PS_CLEAR_ALL_TABLE = 10
-PS_PUSH_DENSE_PARAM = 11
-PS_STOP_SERVER = 12
-PS_SAVE_ONE_CACHE_TABLE = 13
-PS_GET_CACHE_THRESHOLD = 14
-PS_CACHE_SHUFFLE = 15
-PS_COPY_TABLE = 16
-PS_COPY_TABLE_BY_FEASIGN = 17
-PS_PULL_SPARSE_TABLE_WITH_DEPENDENCY = 18
-PS_PUSH_SPARSE_TABLE_WITH_DEPENDENCY = 19
-PS_PRINT_TABLE_STAT = 20
-PS_S2S_MSG = 101
-
-_FSCLIENTPARAMETER_FSAPITYPE = _descriptor.EnumDescriptor(
-    name='FsApiType',
-    full_name='paddle.FsClientParameter.FsApiType',
-    filename=None,
-    file=DESCRIPTOR,
-    values=[
-        _descriptor.EnumValueDescriptor(
-            name='HDFS', index=0, number=0, options=None, type=None
-        ),
-        _descriptor.EnumValueDescriptor(
-            name='AFS', index=1, number=1, options=None, type=None
-        ),
-    ],
-    containing_type=None,
-    options=None,
-    serialized_start=4647,
-    serialized_end=4677,
-)
-_sym_db.RegisterEnumDescriptor(_FSCLIENTPARAMETER_FSAPITYPE)
-
-_PSPARAMETER = _descriptor.Descriptor(
-    name='PSParameter',
-    full_name='paddle.PSParameter',
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name='worker_class',
-            full_name='paddle.PSParameter.worker_class',
-            index=0,
-            number=1,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode('utf-8'),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='server_class',
-            full_name='paddle.PSParameter.server_class',
-            index=1,
-            number=2,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode('utf-8'),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='instance_class',
-            full_name='paddle.PSParameter.instance_class',
-            index=2,
-            number=3,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode('utf-8'),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='init_gflags',
-            full_name='paddle.PSParameter.init_gflags',
-            index=3,
-            number=4,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=True,
-            default_value=_b("").decode('utf-8'),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='worker_param',
-            full_name='paddle.PSParameter.worker_param',
-            index=4,
-            number=101,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='server_param',
-            full_name='paddle.PSParameter.server_param',
-            index=5,
-            number=102,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='trainer_param',
-            full_name='paddle.PSParameter.trainer_param',
-            index=6,
-            number=301,
-            type=11,
-            cpp_type=10,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='fs_client_param',
-            full_name='paddle.PSParameter.fs_client_param',
-            index=7,
-            number=501,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    options=None,
-    is_extendable=False,
-    syntax='proto2',
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=21,
-    serialized_end=330,
-)
-
-_WORKERPARAMETER = _descriptor.Descriptor(
-    name='WorkerParameter',
-    full_name='paddle.WorkerParameter',
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name='downpour_worker_param',
-            full_name='paddle.WorkerParameter.downpour_worker_param',
-            index=0,
-            number=1,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    options=None,
-    is_extendable=False,
-    syntax='proto2',
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=332,
-    serialized_end=413,
-)
-
-_SERVERPARAMETER = _descriptor.Descriptor(
-    name='ServerParameter',
-    full_name='paddle.ServerParameter',
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name='downpour_server_param',
-            full_name='paddle.ServerParameter.downpour_server_param',
-            index=0,
-            number=1,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    options=None,
-    is_extendable=False,
-    syntax='proto2',
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=415,
-    serialized_end=496,
-)
-
-_DOWNPOURWORKERPARAMETER = _descriptor.Descriptor(
-    name='DownpourWorkerParameter',
-    full_name='paddle.DownpourWorkerParameter',
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name='downpour_table_param',
-            full_name='paddle.DownpourWorkerParameter.downpour_table_param',
-            index=0,
-            number=1,
-            type=11,
-            cpp_type=10,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    options=None,
-    is_extendable=False,
-    syntax='proto2',
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=498,
-    serialized_end=577,
-)
-
-_DOWNPOURTRAINERPARAMETER = _descriptor.Descriptor(
-    name='DownpourTrainerParameter',
-    full_name='paddle.DownpourTrainerParameter',
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name='dense_table',
-            full_name='paddle.DownpourTrainerParameter.dense_table',
-            index=0,
-            number=1,
-            type=11,
-            cpp_type=10,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='sparse_table',
-            full_name='paddle.DownpourTrainerParameter.sparse_table',
-            index=1,
-            number=2,
-            type=11,
-            cpp_type=10,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='push_sparse_per_batch',
-            full_name='paddle.DownpourTrainerParameter.push_sparse_per_batch',
-            index=2,
-            number=3,
-            type=5,
-            cpp_type=1,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='push_dense_per_batch',
-            full_name='paddle.DownpourTrainerParameter.push_dense_per_batch',
-            index=3,
-            number=4,
-            type=5,
-            cpp_type=1,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='skip_op',
-            full_name='paddle.DownpourTrainerParameter.skip_op',
-            index=4,
-            number=5,
-            type=9,
-            cpp_type=9,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='program_config',
-            full_name='paddle.DownpourTrainerParameter.program_config',
-            index=5,
-            number=6,
-            type=11,
-            cpp_type=10,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    options=None,
-    is_extendable=False,
-    syntax='proto2',
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=580,
-    serialized_end=833,
-)
-
-_PROGRAMCONFIG = _descriptor.Descriptor(
-    name='ProgramConfig',
-    full_name='paddle.ProgramConfig',
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name='program_id',
-            full_name='paddle.ProgramConfig.program_id',
-            index=0,
-            number=1,
-            type=9,
-            cpp_type=9,
-            label=2,
-            has_default_value=False,
-            default_value=_b("").decode('utf-8'),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='push_sparse_table_id',
-            full_name='paddle.ProgramConfig.push_sparse_table_id',
-            index=1,
-            number=2,
-            type=5,
-            cpp_type=1,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='push_dense_table_id',
-            full_name='paddle.ProgramConfig.push_dense_table_id',
-            index=2,
-            number=3,
-            type=5,
-            cpp_type=1,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='pull_sparse_table_id',
-            full_name='paddle.ProgramConfig.pull_sparse_table_id',
-            index=3,
-            number=4,
-            type=5,
-            cpp_type=1,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='pull_dense_table_id',
-            full_name='paddle.ProgramConfig.pull_dense_table_id',
-            index=4,
-            number=5,
-            type=5,
-            cpp_type=1,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    options=None,
-    is_extendable=False,
-    syntax='proto2',
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=836,
-    serialized_end=989,
-)
-
-_DENSETABLEPARAMETER = _descriptor.Descriptor(
-    name='DenseTableParameter',
-    full_name='paddle.DenseTableParameter',
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name='table_id',
-            full_name='paddle.DenseTableParameter.table_id',
-            index=0,
-            number=1,
-            type=5,
-            cpp_type=1,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='dense_variable_name',
-            full_name='paddle.DenseTableParameter.dense_variable_name',
-            index=1,
-            number=2,
-            type=9,
-            cpp_type=9,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='dense_gradient_variable_name',
-            full_name='paddle.DenseTableParameter.dense_gradient_variable_name',
-            index=2,
-            number=3,
-            type=9,
-            cpp_type=9,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='fea_dim',
-            full_name='paddle.DenseTableParameter.fea_dim',
-            index=3,
-            number=4,
-            type=5,
-            cpp_type=1,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    options=None,
-    is_extendable=False,
-    syntax='proto2',
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=991,
-    serialized_end=1114,
-)
-
-_SPARSETABLEPARAMETER = _descriptor.Descriptor(
-    name='SparseTableParameter',
-    full_name='paddle.SparseTableParameter',
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name='table_id',
-            full_name='paddle.SparseTableParameter.table_id',
-            index=0,
-            number=1,
-            type=5,
-            cpp_type=1,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='feature_dim',
-            full_name='paddle.SparseTableParameter.feature_dim',
-            index=1,
-            number=2,
-            type=5,
-            cpp_type=1,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='slot_key',
-            full_name='paddle.SparseTableParameter.slot_key',
-            index=2,
-            number=3,
-            type=9,
-            cpp_type=9,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='slot_value',
-            full_name='paddle.SparseTableParameter.slot_value',
-            index=3,
-            number=4,
-            type=9,
-            cpp_type=9,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='slot_gradient',
-            full_name='paddle.SparseTableParameter.slot_gradient',
-            index=4,
-            number=5,
-            type=9,
-            cpp_type=9,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    options=None,
-    is_extendable=False,
-    syntax='proto2',
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=1116,
-    serialized_end=1238,
-)
-
-_DOWNPOURSERVERPARAMETER = _descriptor.Descriptor(
-    name='DownpourServerParameter',
-    full_name='paddle.DownpourServerParameter',
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name='downpour_table_param',
-            full_name='paddle.DownpourServerParameter.downpour_table_param',
-            index=0,
-            number=1,
-            type=11,
-            cpp_type=10,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='service_param',
-            full_name='paddle.DownpourServerParameter.service_param',
-            index=1,
-            number=2,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    options=None,
-    is_extendable=False,
-    syntax='proto2',
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=1241,
-    serialized_end=1375,
-)
-
-_SERVERSERVICEPARAMETER = _descriptor.Descriptor(
-    name='ServerServiceParameter',
-    full_name='paddle.ServerServiceParameter',
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name='server_class',
-            full_name='paddle.ServerServiceParameter.server_class',
-            index=0,
-            number=1,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=True,
-            default_value=_b("DownpourBrpcPsServer").decode('utf-8'),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='client_class',
-            full_name='paddle.ServerServiceParameter.client_class',
-            index=1,
-            number=2,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=True,
-            default_value=_b("DownpourBrpcPsClient").decode('utf-8'),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='service_class',
-            full_name='paddle.ServerServiceParameter.service_class',
-            index=2,
-            number=3,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=True,
-            default_value=_b("DownpourPsService").decode('utf-8'),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='start_server_port',
-            full_name='paddle.ServerServiceParameter.start_server_port',
-            index=3,
-            number=4,
-            type=13,
-            cpp_type=3,
-            label=1,
-            has_default_value=True,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='server_thread_num',
-            full_name='paddle.ServerServiceParameter.server_thread_num',
-            index=4,
-            number=5,
-            type=13,
-            cpp_type=3,
-            label=1,
-            has_default_value=True,
-            default_value=12,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    options=None,
-    is_extendable=False,
-    syntax='proto2',
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=1378,
-    serialized_end=1593,
-)
-
-_TABLEPARAMETER = _descriptor.Descriptor(
-    name='TableParameter',
-    full_name='paddle.TableParameter',
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name='table_id',
-            full_name='paddle.TableParameter.table_id',
-            index=0,
-            number=1,
-            type=4,
-            cpp_type=4,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='table_class',
-            full_name='paddle.TableParameter.table_class',
-            index=1,
-            number=2,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode('utf-8'),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='shard_num',
-            full_name='paddle.TableParameter.shard_num',
-            index=2,
-            number=3,
-            type=4,
-            cpp_type=4,
-            label=1,
-            has_default_value=True,
-            default_value=1000,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='accessor',
-            full_name='paddle.TableParameter.accessor',
-            index=3,
-            number=4,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='type',
-            full_name='paddle.TableParameter.type',
-            index=4,
-            number=5,
-            type=14,
-            cpp_type=8,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='compress_in_save',
-            full_name='paddle.TableParameter.compress_in_save',
-            index=5,
-            number=6,
-            type=8,
-            cpp_type=7,
-            label=1,
-            has_default_value=True,
-            default_value=False,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='enable_sparse_table_cache',
-            full_name='paddle.TableParameter.enable_sparse_table_cache',
-            index=6,
-            number=7,
-            type=8,
-            cpp_type=7,
-            label=1,
-            has_default_value=True,
-            default_value=True,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='sparse_table_cache_rate',
-            full_name='paddle.TableParameter.sparse_table_cache_rate',
-            index=7,
-            number=8,
-            type=1,
-            cpp_type=5,
-            label=1,
-            has_default_value=True,
-            default_value=float(0.00055),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='sparse_table_cache_file_num',
-            full_name='paddle.TableParameter.sparse_table_cache_file_num',
-            index=8,
-            number=9,
-            type=13,
-            cpp_type=3,
-            label=1,
-            has_default_value=True,
-            default_value=16,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    options=None,
-    is_extendable=False,
-    syntax='proto2',
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=1596,
-    serialized_end=1916,
-)
-
-_TABLEACCESSORPARAMETER = _descriptor.Descriptor(
-    name='TableAccessorParameter',
-    full_name='paddle.TableAccessorParameter',
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name='accessor_class',
-            full_name='paddle.TableAccessorParameter.accessor_class',
-            index=0,
-            number=1,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode('utf-8'),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='sparse_sgd_param',
-            full_name='paddle.TableAccessorParameter.sparse_sgd_param',
-            index=1,
-            number=2,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='dense_sgd_param',
-            full_name='paddle.TableAccessorParameter.dense_sgd_param',
-            index=2,
-            number=3,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='fea_dim',
-            full_name='paddle.TableAccessorParameter.fea_dim',
-            index=3,
-            number=4,
-            type=13,
-            cpp_type=3,
-            label=1,
-            has_default_value=True,
-            default_value=11,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='embedx_dim',
-            full_name='paddle.TableAccessorParameter.embedx_dim',
-            index=4,
-            number=5,
-            type=13,
-            cpp_type=3,
-            label=1,
-            has_default_value=True,
-            default_value=8,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='embedx_threshold',
-            full_name='paddle.TableAccessorParameter.embedx_threshold',
-            index=5,
-            number=6,
-            type=13,
-            cpp_type=3,
-            label=1,
-            has_default_value=True,
-            default_value=10,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='downpour_accessor_param',
-            full_name='paddle.TableAccessorParameter.downpour_accessor_param',
-            index=6,
-            number=7,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='table_accessor_save_param',
-            full_name='paddle.TableAccessorParameter.table_accessor_save_param',
-            index=7,
-            number=8,
-            type=11,
-            cpp_type=10,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='sparse_commonsgd_param',
-            full_name='paddle.TableAccessorParameter.sparse_commonsgd_param',
-            index=8,
-            number=9,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='embed_sgd_param',
-            full_name='paddle.TableAccessorParameter.embed_sgd_param',
-            index=9,
-            number=10,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='embedx_sgd_param',
-            full_name='paddle.TableAccessorParameter.embedx_sgd_param',
-            index=10,
-            number=11,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    options=None,
-    is_extendable=False,
-    syntax='proto2',
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=1919,
-    serialized_end=2496,
-)
-
-_DOWNPOURTABLEACCESSORPARAMETER = _descriptor.Descriptor(
-    name='DownpourTableAccessorParameter',
-    full_name='paddle.DownpourTableAccessorParameter',
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name='nonclk_coeff',
-            full_name='paddle.DownpourTableAccessorParameter.nonclk_coeff',
-            index=0,
-            number=1,
-            type=2,
-            cpp_type=6,
-            label=1,
-            has_default_value=True,
-            default_value=float(0.1),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='click_coeff',
-            full_name='paddle.DownpourTableAccessorParameter.click_coeff',
-            index=1,
-            number=2,
-            type=2,
-            cpp_type=6,
-            label=1,
-            has_default_value=True,
-            default_value=float(1),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='base_threshold',
-            full_name='paddle.DownpourTableAccessorParameter.base_threshold',
-            index=2,
-            number=3,
-            type=2,
-            cpp_type=6,
-            label=1,
-            has_default_value=True,
-            default_value=float(1.5),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='delta_threshold',
-            full_name='paddle.DownpourTableAccessorParameter.delta_threshold',
-            index=3,
-            number=4,
-            type=2,
-            cpp_type=6,
-            label=1,
-            has_default_value=True,
-            default_value=float(0.25),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='delta_keep_days',
-            full_name='paddle.DownpourTableAccessorParameter.delta_keep_days',
-            index=4,
-            number=5,
-            type=2,
-            cpp_type=6,
-            label=1,
-            has_default_value=True,
-            default_value=float(16),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='show_click_decay_rate',
-            full_name='paddle.DownpourTableAccessorParameter.show_click_decay_rate',
-            index=5,
-            number=6,
-            type=2,
-            cpp_type=6,
-            label=1,
-            has_default_value=True,
-            default_value=float(0.98),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='delete_threshold',
-            full_name='paddle.DownpourTableAccessorParameter.delete_threshold',
-            index=6,
-            number=7,
-            type=2,
-            cpp_type=6,
-            label=1,
-            has_default_value=True,
-            default_value=float(0.8),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='delete_after_unseen_days',
-            full_name='paddle.DownpourTableAccessorParameter.delete_after_unseen_days',
-            index=7,
-            number=8,
-            type=2,
-            cpp_type=6,
-            label=1,
-            has_default_value=True,
-            default_value=float(30),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='ssd_unseenday_threshold',
-            full_name='paddle.DownpourTableAccessorParameter.ssd_unseenday_threshold',
-            index=8,
-            number=9,
-            type=5,
-            cpp_type=1,
-            label=1,
-            has_default_value=True,
-            default_value=1,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    options=None,
-    is_extendable=False,
-    syntax='proto2',
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=2499,
-    serialized_end=2813,
-)
-
-_TABLEACCESSORSAVEPARAMETER = _descriptor.Descriptor(
-    name='TableAccessorSaveParameter',
-    full_name='paddle.TableAccessorSaveParameter',
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name='param',
-            full_name='paddle.TableAccessorSaveParameter.param',
-            index=0,
-            number=1,
-            type=13,
-            cpp_type=3,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='converter',
-            full_name='paddle.TableAccessorSaveParameter.converter',
-            index=1,
-            number=2,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode('utf-8'),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='deconverter',
-            full_name='paddle.TableAccessorSaveParameter.deconverter',
-            index=2,
-            number=3,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode('utf-8'),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    options=None,
-    is_extendable=False,
-    syntax='proto2',
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=2815,
-    serialized_end=2898,
-)
-
-_PSREQUESTMESSAGE = _descriptor.Descriptor(
-    name='PsRequestMessage',
-    full_name='paddle.PsRequestMessage',
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name='cmd_id',
-            full_name='paddle.PsRequestMessage.cmd_id',
-            index=0,
-            number=1,
-            type=13,
-            cpp_type=3,
-            label=2,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='table_id',
-            full_name='paddle.PsRequestMessage.table_id',
-            index=1,
-            number=2,
-            type=13,
-            cpp_type=3,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='params',
-            full_name='paddle.PsRequestMessage.params',
-            index=2,
-            number=3,
-            type=12,
-            cpp_type=9,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='client_id',
-            full_name='paddle.PsRequestMessage.client_id',
-            index=3,
-            number=4,
-            type=5,
-            cpp_type=1,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='data',
-            full_name='paddle.PsRequestMessage.data',
-            index=4,
-            number=5,
-            type=12,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b(""),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    options=None,
-    is_extendable=False,
-    syntax='proto2',
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=2900,
-    serialized_end=3001,
-)
-
-_SPARSESGDRULEPARAMETER = _descriptor.Descriptor(
-    name='SparseSGDRuleParameter',
-    full_name='paddle.SparseSGDRuleParameter',
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name='learning_rate',
-            full_name='paddle.SparseSGDRuleParameter.learning_rate',
-            index=0,
-            number=1,
-            type=1,
-            cpp_type=5,
-            label=1,
-            has_default_value=True,
-            default_value=float(0.05),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='initial_g2sum',
-            full_name='paddle.SparseSGDRuleParameter.initial_g2sum',
-            index=1,
-            number=2,
-            type=1,
-            cpp_type=5,
-            label=1,
-            has_default_value=True,
-            default_value=float(3),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='initial_range',
-            full_name='paddle.SparseSGDRuleParameter.initial_range',
-            index=2,
-            number=3,
-            type=1,
-            cpp_type=5,
-            label=1,
-            has_default_value=True,
-            default_value=float(0.0001),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='weight_bounds',
-            full_name='paddle.SparseSGDRuleParameter.weight_bounds',
-            index=3,
-            number=4,
-            type=2,
-            cpp_type=6,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    options=None,
-    is_extendable=False,
-    syntax='proto2',
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=3004,
-    serialized_end=3137,
-)
-
-_SPARSECOMMONSGDRULEPARAMETER = _descriptor.Descriptor(
-    name='SparseCommonSGDRuleParameter',
-    full_name='paddle.SparseCommonSGDRuleParameter',
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name='name',
-            full_name='paddle.SparseCommonSGDRuleParameter.name',
-            index=0,
-            number=1,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode('utf-8'),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='naive',
-            full_name='paddle.SparseCommonSGDRuleParameter.naive',
-            index=1,
-            number=2,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='adagrad',
-            full_name='paddle.SparseCommonSGDRuleParameter.adagrad',
-            index=2,
-            number=3,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='adam',
-            full_name='paddle.SparseCommonSGDRuleParameter.adam',
-            index=3,
-            number=4,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    options=None,
-    is_extendable=False,
-    syntax='proto2',
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=3140,
-    serialized_end=3338,
-)
-
-_SPARSENAIVESGDRULEPARAMETER = _descriptor.Descriptor(
-    name='SparseNaiveSGDRuleParameter',
-    full_name='paddle.SparseNaiveSGDRuleParameter',
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name='learning_rate',
-            full_name='paddle.SparseNaiveSGDRuleParameter.learning_rate',
-            index=0,
-            number=1,
-            type=1,
-            cpp_type=5,
-            label=1,
-            has_default_value=True,
-            default_value=float(0.05),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='initial_range',
-            full_name='paddle.SparseNaiveSGDRuleParameter.initial_range',
-            index=1,
-            number=2,
-            type=1,
-            cpp_type=5,
-            label=1,
-            has_default_value=True,
-            default_value=float(0.0001),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='weight_bounds',
-            full_name='paddle.SparseNaiveSGDRuleParameter.weight_bounds',
-            index=2,
-            number=3,
-            type=2,
-            cpp_type=6,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    options=None,
-    is_extendable=False,
-    syntax='proto2',
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=3340,
-    serialized_end=3452,
-)
-
-_SPARSEADAGRADSGDRULEPARAMETER = _descriptor.Descriptor(
-    name='SparseAdagradSGDRuleParameter',
-    full_name='paddle.SparseAdagradSGDRuleParameter',
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name='learning_rate',
-            full_name='paddle.SparseAdagradSGDRuleParameter.learning_rate',
-            index=0,
-            number=1,
-            type=1,
-            cpp_type=5,
-            label=1,
-            has_default_value=True,
-            default_value=float(0.05),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='initial_g2sum',
-            full_name='paddle.SparseAdagradSGDRuleParameter.initial_g2sum',
-            index=1,
-            number=2,
-            type=1,
-            cpp_type=5,
-            label=1,
-            has_default_value=True,
-            default_value=float(3),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='initial_range',
-            full_name='paddle.SparseAdagradSGDRuleParameter.initial_range',
-            index=2,
-            number=3,
-            type=1,
-            cpp_type=5,
-            label=1,
-            has_default_value=True,
-            default_value=float(0.0001),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='weight_bounds',
-            full_name='paddle.SparseAdagradSGDRuleParameter.weight_bounds',
-            index=3,
-            number=4,
-            type=2,
-            cpp_type=6,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    options=None,
-    is_extendable=False,
-    syntax='proto2',
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=3455,
-    serialized_end=3595,
-)
-
-_SPARSEADAMSGDPARAMETER = _descriptor.Descriptor(
-    name='SparseAdamSGDParameter',
-    full_name='paddle.SparseAdamSGDParameter',
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name='learning_rate',
-            full_name='paddle.SparseAdamSGDParameter.learning_rate',
-            index=0,
-            number=1,
-            type=1,
-            cpp_type=5,
-            label=1,
-            has_default_value=True,
-            default_value=float(0.001),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='initial_range',
-            full_name='paddle.SparseAdamSGDParameter.initial_range',
-            index=1,
-            number=2,
-            type=1,
-            cpp_type=5,
-            label=1,
-            has_default_value=True,
-            default_value=float(0.0001),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='beta1_decay_rate',
-            full_name='paddle.SparseAdamSGDParameter.beta1_decay_rate',
-            index=2,
-            number=3,
-            type=1,
-            cpp_type=5,
-            label=1,
-            has_default_value=True,
-            default_value=float(0.9),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='beta2_decay_rate',
-            full_name='paddle.SparseAdamSGDParameter.beta2_decay_rate',
-            index=3,
-            number=4,
-            type=1,
-            cpp_type=5,
-            label=1,
-            has_default_value=True,
-            default_value=float(0.999),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='ada_epsilon',
-            full_name='paddle.SparseAdamSGDParameter.ada_epsilon',
-            index=4,
-            number=5,
-            type=1,
-            cpp_type=5,
-            label=1,
-            has_default_value=True,
-            default_value=float(1e-08),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='weight_bounds',
-            full_name='paddle.SparseAdamSGDParameter.weight_bounds',
-            index=5,
-            number=6,
-            type=2,
-            cpp_type=6,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    options=None,
-    is_extendable=False,
-    syntax='proto2',
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=3598,
-    serialized_end=3798,
-)
-
-_DENSESGDRULEPARAMETER = _descriptor.Descriptor(
-    name='DenseSGDRuleParameter',
-    full_name='paddle.DenseSGDRuleParameter',
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name='name',
-            full_name='paddle.DenseSGDRuleParameter.name',
-            index=0,
-            number=1,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode('utf-8'),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='adam',
-            full_name='paddle.DenseSGDRuleParameter.adam',
-            index=1,
-            number=2,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='naive',
-            full_name='paddle.DenseSGDRuleParameter.naive',
-            index=2,
-            number=3,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='summary',
-            full_name='paddle.DenseSGDRuleParameter.summary',
-            index=3,
-            number=4,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='moving_average',
-            full_name='paddle.DenseSGDRuleParameter.moving_average',
-            index=4,
-            number=5,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    options=None,
-    is_extendable=False,
-    syntax='proto2',
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=3801,
-    serialized_end=4026,
-)
-
-_ADAMSGDPARAMETER = _descriptor.Descriptor(
-    name='AdamSGDParameter',
-    full_name='paddle.AdamSGDParameter',
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name='learning_rate',
-            full_name='paddle.AdamSGDParameter.learning_rate',
-            index=0,
-            number=1,
-            type=1,
-            cpp_type=5,
-            label=1,
-            has_default_value=True,
-            default_value=float(5e-06),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='avg_decay_rate',
-            full_name='paddle.AdamSGDParameter.avg_decay_rate',
-            index=1,
-            number=2,
-            type=1,
-            cpp_type=5,
-            label=1,
-            has_default_value=True,
-            default_value=float(0.999993),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='ada_decay_rate',
-            full_name='paddle.AdamSGDParameter.ada_decay_rate',
-            index=2,
-            number=3,
-            type=1,
-            cpp_type=5,
-            label=1,
-            has_default_value=True,
-            default_value=float(0.9999),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='ada_epsilon',
-            full_name='paddle.AdamSGDParameter.ada_epsilon',
-            index=3,
-            number=4,
-            type=1,
-            cpp_type=5,
-            label=1,
-            has_default_value=True,
-            default_value=float(1e-08),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='mom_decay_rate',
-            full_name='paddle.AdamSGDParameter.mom_decay_rate',
-            index=4,
-            number=5,
-            type=1,
-            cpp_type=5,
-            label=1,
-            has_default_value=True,
-            default_value=float(0.99),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    options=None,
-    is_extendable=False,
-    syntax='proto2',
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=4029,
-    serialized_end=4201,
-)
-
-_NAIVESGDPARAMETER = _descriptor.Descriptor(
-    name='NaiveSGDParameter',
-    full_name='paddle.NaiveSGDParameter',
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name='learning_rate',
-            full_name='paddle.NaiveSGDParameter.learning_rate',
-            index=0,
-            number=1,
-            type=1,
-            cpp_type=5,
-            label=1,
-            has_default_value=True,
-            default_value=float(0.0002),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='avg_decay_rate',
-            full_name='paddle.NaiveSGDParameter.avg_decay_rate',
-            index=1,
-            number=2,
-            type=1,
-            cpp_type=5,
-            label=1,
-            has_default_value=False,
-            default_value=float(0),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    options=None,
-    is_extendable=False,
-    syntax='proto2',
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=4203,
-    serialized_end=4277,
-)
-
-_SUMMARYSGDPARAMETER = _descriptor.Descriptor(
-    name='SummarySGDParameter',
-    full_name='paddle.SummarySGDParameter',
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name='summary_decay_rate',
-            full_name='paddle.SummarySGDParameter.summary_decay_rate',
-            index=0,
-            number=1,
-            type=1,
-            cpp_type=5,
-            label=1,
-            has_default_value=True,
-            default_value=float(0.999999),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    options=None,
-    is_extendable=False,
-    syntax='proto2',
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=4279,
-    serialized_end=4338,
-)
-
-_MOVINGAVERAGERULEPARAMETER = _descriptor.Descriptor(
-    name='MovingAverageRuleParameter',
-    full_name='paddle.MovingAverageRuleParameter',
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name='momentum',
-            full_name='paddle.MovingAverageRuleParameter.momentum',
-            index=0,
-            number=1,
-            type=1,
-            cpp_type=5,
-            label=1,
-            has_default_value=False,
-            default_value=float(0),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    options=None,
-    is_extendable=False,
-    syntax='proto2',
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=4340,
-    serialized_end=4386,
-)
-
-_PSRESPONSEMESSAGE = _descriptor.Descriptor(
-    name='PsResponseMessage',
-    full_name='paddle.PsResponseMessage',
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name='err_code',
-            full_name='paddle.PsResponseMessage.err_code',
-            index=0,
-            number=1,
-            type=5,
-            cpp_type=1,
-            label=2,
-            has_default_value=True,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='err_msg',
-            full_name='paddle.PsResponseMessage.err_msg',
-            index=1,
-            number=2,
-            type=9,
-            cpp_type=9,
-            label=2,
-            has_default_value=True,
-            default_value=_b("").decode('utf-8'),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='data',
-            full_name='paddle.PsResponseMessage.data',
-            index=2,
-            number=3,
-            type=12,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b(""),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    options=None,
-    is_extendable=False,
-    syntax='proto2',
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=4388,
-    serialized_end=4461,
-)
-
-_FSCLIENTPARAMETER = _descriptor.Descriptor(
-    name='FsClientParameter',
-    full_name='paddle.FsClientParameter',
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name='fs_type',
-            full_name='paddle.FsClientParameter.fs_type',
-            index=0,
-            number=1,
-            type=14,
-            cpp_type=8,
-            label=1,
-            has_default_value=True,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='uri',
-            full_name='paddle.FsClientParameter.uri',
-            index=1,
-            number=2,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode('utf-8'),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='user',
-            full_name='paddle.FsClientParameter.user',
-            index=2,
-            number=3,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode('utf-8'),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='passwd',
-            full_name='paddle.FsClientParameter.passwd',
-            index=3,
-            number=4,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode('utf-8'),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='buffer_size',
-            full_name='paddle.FsClientParameter.buffer_size',
-            index=4,
-            number=5,
-            type=5,
-            cpp_type=1,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='hadoop_bin',
-            full_name='paddle.FsClientParameter.hadoop_bin',
-            index=5,
-            number=51,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode('utf-8'),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-        _descriptor.FieldDescriptor(
-            name='afs_conf',
-            full_name='paddle.FsClientParameter.afs_conf',
-            index=6,
-            number=101,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode('utf-8'),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            options=None,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[
-        _FSCLIENTPARAMETER_FSAPITYPE,
-    ],
-    options=None,
-    is_extendable=False,
-    syntax='proto2',
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=4464,
-    serialized_end=4677,
-)
-
-_PSPARAMETER.fields_by_name['worker_param'].message_type = _WORKERPARAMETER
-_PSPARAMETER.fields_by_name['server_param'].message_type = _SERVERPARAMETER
-_PSPARAMETER.fields_by_name[
-    'trainer_param'
-].message_type = _DOWNPOURTRAINERPARAMETER
-_PSPARAMETER.fields_by_name['fs_client_param'].message_type = _FSCLIENTPARAMETER
-_WORKERPARAMETER.fields_by_name[
-    'downpour_worker_param'
-].message_type = _DOWNPOURWORKERPARAMETER
-_SERVERPARAMETER.fields_by_name[
-    'downpour_server_param'
-].message_type = _DOWNPOURSERVERPARAMETER
-_DOWNPOURWORKERPARAMETER.fields_by_name[
-    'downpour_table_param'
-].message_type = _TABLEPARAMETER
-_DOWNPOURTRAINERPARAMETER.fields_by_name[
-    'dense_table'
-].message_type = _DENSETABLEPARAMETER
-_DOWNPOURTRAINERPARAMETER.fields_by_name[
-    'sparse_table'
-].message_type = _SPARSETABLEPARAMETER
-_DOWNPOURTRAINERPARAMETER.fields_by_name[
-    'program_config'
-].message_type = _PROGRAMCONFIG
-_DOWNPOURSERVERPARAMETER.fields_by_name[
-    'downpour_table_param'
-].message_type = _TABLEPARAMETER
-_DOWNPOURSERVERPARAMETER.fields_by_name[
-    'service_param'
-].message_type = _SERVERSERVICEPARAMETER
-_TABLEPARAMETER.fields_by_name[
-    'accessor'
-].message_type = _TABLEACCESSORPARAMETER
-_TABLEPARAMETER.fields_by_name['type'].enum_type = _TABLETYPE
-_TABLEACCESSORPARAMETER.fields_by_name[
-    'sparse_sgd_param'
-].message_type = _SPARSESGDRULEPARAMETER
-_TABLEACCESSORPARAMETER.fields_by_name[
-    'dense_sgd_param'
-].message_type = _DENSESGDRULEPARAMETER
-_TABLEACCESSORPARAMETER.fields_by_name[
-    'downpour_accessor_param'
-].message_type = _DOWNPOURTABLEACCESSORPARAMETER
-_TABLEACCESSORPARAMETER.fields_by_name[
-    'table_accessor_save_param'
-].message_type = _TABLEACCESSORSAVEPARAMETER
-_TABLEACCESSORPARAMETER.fields_by_name[
-    'sparse_commonsgd_param'
-].message_type = _SPARSECOMMONSGDRULEPARAMETER
-_TABLEACCESSORPARAMETER.fields_by_name[
-    'embed_sgd_param'
-].message_type = _SPARSECOMMONSGDRULEPARAMETER
-_TABLEACCESSORPARAMETER.fields_by_name[
-    'embedx_sgd_param'
-].message_type = _SPARSECOMMONSGDRULEPARAMETER
-_SPARSECOMMONSGDRULEPARAMETER.fields_by_name[
-    'naive'
-].message_type = _SPARSENAIVESGDRULEPARAMETER
-_SPARSECOMMONSGDRULEPARAMETER.fields_by_name[
-    'adagrad'
-].message_type = _SPARSEADAGRADSGDRULEPARAMETER
-_SPARSECOMMONSGDRULEPARAMETER.fields_by_name[
-    'adam'
-].message_type = _SPARSEADAMSGDPARAMETER
-_DENSESGDRULEPARAMETER.fields_by_name['adam'].message_type = _ADAMSGDPARAMETER
-_DENSESGDRULEPARAMETER.fields_by_name['naive'].message_type = _NAIVESGDPARAMETER
-_DENSESGDRULEPARAMETER.fields_by_name[
-    'summary'
-].message_type = _SUMMARYSGDPARAMETER
-_DENSESGDRULEPARAMETER.fields_by_name[
-    'moving_average'
-].message_type = _MOVINGAVERAGERULEPARAMETER
-_FSCLIENTPARAMETER.fields_by_name[
-    'fs_type'
-].enum_type = _FSCLIENTPARAMETER_FSAPITYPE
-_FSCLIENTPARAMETER_FSAPITYPE.containing_type = _FSCLIENTPARAMETER
-DESCRIPTOR.message_types_by_name['PSParameter'] = _PSPARAMETER
-DESCRIPTOR.message_types_by_name['WorkerParameter'] = _WORKERPARAMETER
-DESCRIPTOR.message_types_by_name['ServerParameter'] = _SERVERPARAMETER
-DESCRIPTOR.message_types_by_name[
-    'DownpourWorkerParameter'
-] = _DOWNPOURWORKERPARAMETER
-DESCRIPTOR.message_types_by_name[
-    'DownpourTrainerParameter'
-] = _DOWNPOURTRAINERPARAMETER
-DESCRIPTOR.message_types_by_name['ProgramConfig'] = _PROGRAMCONFIG
-DESCRIPTOR.message_types_by_name['DenseTableParameter'] = _DENSETABLEPARAMETER
-DESCRIPTOR.message_types_by_name['SparseTableParameter'] = _SPARSETABLEPARAMETER
-DESCRIPTOR.message_types_by_name[
-    'DownpourServerParameter'
-] = _DOWNPOURSERVERPARAMETER
-DESCRIPTOR.message_types_by_name[
-    'ServerServiceParameter'
-] = _SERVERSERVICEPARAMETER
-DESCRIPTOR.message_types_by_name['TableParameter'] = _TABLEPARAMETER
-DESCRIPTOR.message_types_by_name[
-    'TableAccessorParameter'
-] = _TABLEACCESSORPARAMETER
-DESCRIPTOR.message_types_by_name[
-    'DownpourTableAccessorParameter'
-] = _DOWNPOURTABLEACCESSORPARAMETER
-DESCRIPTOR.message_types_by_name[
-    'TableAccessorSaveParameter'
-] = _TABLEACCESSORSAVEPARAMETER
-DESCRIPTOR.message_types_by_name['PsRequestMessage'] = _PSREQUESTMESSAGE
-DESCRIPTOR.message_types_by_name[
-    'SparseSGDRuleParameter'
-] = _SPARSESGDRULEPARAMETER
-DESCRIPTOR.message_types_by_name[
-    'SparseCommonSGDRuleParameter'
-] = _SPARSECOMMONSGDRULEPARAMETER
-DESCRIPTOR.message_types_by_name[
-    'SparseNaiveSGDRuleParameter'
-] = _SPARSENAIVESGDRULEPARAMETER
-DESCRIPTOR.message_types_by_name[
-    'SparseAdagradSGDRuleParameter'
-] = _SPARSEADAGRADSGDRULEPARAMETER
-DESCRIPTOR.message_types_by_name[
-    'SparseAdamSGDParameter'
-] = _SPARSEADAMSGDPARAMETER
-DESCRIPTOR.message_types_by_name[
-    'DenseSGDRuleParameter'
-] = _DENSESGDRULEPARAMETER
-DESCRIPTOR.message_types_by_name['AdamSGDParameter'] = _ADAMSGDPARAMETER
-DESCRIPTOR.message_types_by_name['NaiveSGDParameter'] = _NAIVESGDPARAMETER
-DESCRIPTOR.message_types_by_name['SummarySGDParameter'] = _SUMMARYSGDPARAMETER
-DESCRIPTOR.message_types_by_name[
-    'MovingAverageRuleParameter'
-] = _MOVINGAVERAGERULEPARAMETER
-DESCRIPTOR.message_types_by_name['PsResponseMessage'] = _PSRESPONSEMESSAGE
-DESCRIPTOR.message_types_by_name['FsClientParameter'] = _FSCLIENTPARAMETER
-DESCRIPTOR.enum_types_by_name['TableType'] = _TABLETYPE
-DESCRIPTOR.enum_types_by_name['PsCmdID'] = _PSCMDID
-
-PSParameter = _reflection.GeneratedProtocolMessageType(
-    'PSParameter',
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_PSPARAMETER,
-        __module__='ps_pb2'
-        # @@protoc_insertion_point(class_scope:paddle.PSParameter)
-    ),
-)
-_sym_db.RegisterMessage(PSParameter)
-
-WorkerParameter = _reflection.GeneratedProtocolMessageType(
-    'WorkerParameter',
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_WORKERPARAMETER,
-        __module__='ps_pb2'
-        # @@protoc_insertion_point(class_scope:paddle.WorkerParameter)
-    ),
-)
-_sym_db.RegisterMessage(WorkerParameter)
-
-ServerParameter = _reflection.GeneratedProtocolMessageType(
-    'ServerParameter',
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_SERVERPARAMETER,
-        __module__='ps_pb2'
-        # @@protoc_insertion_point(class_scope:paddle.ServerParameter)
-    ),
-)
-_sym_db.RegisterMessage(ServerParameter)
-
-DownpourWorkerParameter = _reflection.GeneratedProtocolMessageType(
-    'DownpourWorkerParameter',
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_DOWNPOURWORKERPARAMETER,
-        __module__='ps_pb2'
-        # @@protoc_insertion_point(class_scope:paddle.DownpourWorkerParameter)
-    ),
-)
-_sym_db.RegisterMessage(DownpourWorkerParameter)
-
-DownpourTrainerParameter = _reflection.GeneratedProtocolMessageType(
-    'DownpourTrainerParameter',
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_DOWNPOURTRAINERPARAMETER,
-        __module__='ps_pb2'
-        # @@protoc_insertion_point(class_scope:paddle.DownpourTrainerParameter)
-    ),
-)
-_sym_db.RegisterMessage(DownpourTrainerParameter)
-
-ProgramConfig = _reflection.GeneratedProtocolMessageType(
-    'ProgramConfig',
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_PROGRAMCONFIG,
-        __module__='ps_pb2'
-        # @@protoc_insertion_point(class_scope:paddle.ProgramConfig)
-    ),
-)
-_sym_db.RegisterMessage(ProgramConfig)
-
-DenseTableParameter = _reflection.GeneratedProtocolMessageType(
-    'DenseTableParameter',
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_DENSETABLEPARAMETER,
-        __module__='ps_pb2'
-        # @@protoc_insertion_point(class_scope:paddle.DenseTableParameter)
-    ),
-)
-_sym_db.RegisterMessage(DenseTableParameter)
-
-SparseTableParameter = _reflection.GeneratedProtocolMessageType(
-    'SparseTableParameter',
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_SPARSETABLEPARAMETER,
-        __module__='ps_pb2'
-        # @@protoc_insertion_point(class_scope:paddle.SparseTableParameter)
-    ),
-)
-_sym_db.RegisterMessage(SparseTableParameter)
-
-DownpourServerParameter = _reflection.GeneratedProtocolMessageType(
-    'DownpourServerParameter',
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_DOWNPOURSERVERPARAMETER,
-        __module__='ps_pb2'
-        # @@protoc_insertion_point(class_scope:paddle.DownpourServerParameter)
-    ),
-)
-_sym_db.RegisterMessage(DownpourServerParameter)
-
-ServerServiceParameter = _reflection.GeneratedProtocolMessageType(
-    'ServerServiceParameter',
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_SERVERSERVICEPARAMETER,
-        __module__='ps_pb2'
-        # @@protoc_insertion_point(class_scope:paddle.ServerServiceParameter)
-    ),
-)
-_sym_db.RegisterMessage(ServerServiceParameter)
-
-TableParameter = _reflection.GeneratedProtocolMessageType(
-    'TableParameter',
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_TABLEPARAMETER,
-        __module__='ps_pb2'
-        # @@protoc_insertion_point(class_scope:paddle.TableParameter)
-    ),
-)
-_sym_db.RegisterMessage(TableParameter)
-
-TableAccessorParameter = _reflection.GeneratedProtocolMessageType(
-    'TableAccessorParameter',
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_TABLEACCESSORPARAMETER,
-        __module__='ps_pb2'
-        # @@protoc_insertion_point(class_scope:paddle.TableAccessorParameter)
-    ),
-)
-_sym_db.RegisterMessage(TableAccessorParameter)
-
-DownpourTableAccessorParameter = _reflection.GeneratedProtocolMessageType(
-    'DownpourTableAccessorParameter',
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_DOWNPOURTABLEACCESSORPARAMETER,
-        __module__='ps_pb2'
-        # @@protoc_insertion_point(class_scope:paddle.DownpourTableAccessorParameter)
-    ),
-)
-_sym_db.RegisterMessage(DownpourTableAccessorParameter)
-
-TableAccessorSaveParameter = _reflection.GeneratedProtocolMessageType(
-    'TableAccessorSaveParameter',
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_TABLEACCESSORSAVEPARAMETER,
-        __module__='ps_pb2'
-        # @@protoc_insertion_point(class_scope:paddle.TableAccessorSaveParameter)
-    ),
-)
-_sym_db.RegisterMessage(TableAccessorSaveParameter)
-
-PsRequestMessage = _reflection.GeneratedProtocolMessageType(
-    'PsRequestMessage',
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_PSREQUESTMESSAGE,
-        __module__='ps_pb2'
-        # @@protoc_insertion_point(class_scope:paddle.PsRequestMessage)
-    ),
-)
-_sym_db.RegisterMessage(PsRequestMessage)
-
-SparseSGDRuleParameter = _reflection.GeneratedProtocolMessageType(
-    'SparseSGDRuleParameter',
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_SPARSESGDRULEPARAMETER,
-        __module__='ps_pb2'
-        # @@protoc_insertion_point(class_scope:paddle.SparseSGDRuleParameter)
-    ),
-)
-_sym_db.RegisterMessage(SparseSGDRuleParameter)
-
-SparseCommonSGDRuleParameter = _reflection.GeneratedProtocolMessageType(
-    'SparseCommonSGDRuleParameter',
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_SPARSECOMMONSGDRULEPARAMETER,
-        __module__='ps_pb2'
-        # @@protoc_insertion_point(class_scope:paddle.SparseCommonSGDRuleParameter)
-    ),
-)
-_sym_db.RegisterMessage(SparseCommonSGDRuleParameter)
-
-SparseNaiveSGDRuleParameter = _reflection.GeneratedProtocolMessageType(
-    'SparseNaiveSGDRuleParameter',
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_SPARSENAIVESGDRULEPARAMETER,
-        __module__='ps_pb2'
-        # @@protoc_insertion_point(class_scope:paddle.SparseNaiveSGDRuleParameter)
-    ),
-)
-_sym_db.RegisterMessage(SparseNaiveSGDRuleParameter)
-
-SparseAdagradSGDRuleParameter = _reflection.GeneratedProtocolMessageType(
-    'SparseAdagradSGDRuleParameter',
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_SPARSEADAGRADSGDRULEPARAMETER,
-        __module__='ps_pb2'
-        # @@protoc_insertion_point(class_scope:paddle.SparseAdagradSGDRuleParameter)
-    ),
-)
-_sym_db.RegisterMessage(SparseAdagradSGDRuleParameter)
-
-SparseAdamSGDParameter = _reflection.GeneratedProtocolMessageType(
-    'SparseAdamSGDParameter',
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_SPARSEADAMSGDPARAMETER,
-        __module__='ps_pb2'
-        # @@protoc_insertion_point(class_scope:paddle.SparseAdamSGDParameter)
-    ),
-)
-_sym_db.RegisterMessage(SparseAdamSGDParameter)
-
-DenseSGDRuleParameter = _reflection.GeneratedProtocolMessageType(
-    'DenseSGDRuleParameter',
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_DENSESGDRULEPARAMETER,
-        __module__='ps_pb2'
-        # @@protoc_insertion_point(class_scope:paddle.DenseSGDRuleParameter)
-    ),
-)
-_sym_db.RegisterMessage(DenseSGDRuleParameter)
-
-AdamSGDParameter = _reflection.GeneratedProtocolMessageType(
-    'AdamSGDParameter',
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_ADAMSGDPARAMETER,
-        __module__='ps_pb2'
-        # @@protoc_insertion_point(class_scope:paddle.AdamSGDParameter)
-    ),
-)
-_sym_db.RegisterMessage(AdamSGDParameter)
-
-NaiveSGDParameter = _reflection.GeneratedProtocolMessageType(
-    'NaiveSGDParameter',
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_NAIVESGDPARAMETER,
-        __module__='ps_pb2'
-        # @@protoc_insertion_point(class_scope:paddle.NaiveSGDParameter)
-    ),
-)
-_sym_db.RegisterMessage(NaiveSGDParameter)
-
-SummarySGDParameter = _reflection.GeneratedProtocolMessageType(
-    'SummarySGDParameter',
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_SUMMARYSGDPARAMETER,
-        __module__='ps_pb2'
-        # @@protoc_insertion_point(class_scope:paddle.SummarySGDParameter)
-    ),
-)
-_sym_db.RegisterMessage(SummarySGDParameter)
-
-MovingAverageRuleParameter = _reflection.GeneratedProtocolMessageType(
-    'MovingAverageRuleParameter',
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_MOVINGAVERAGERULEPARAMETER,
-        __module__='ps_pb2'
-        # @@protoc_insertion_point(class_scope:paddle.MovingAverageRuleParameter)
-    ),
-)
-_sym_db.RegisterMessage(MovingAverageRuleParameter)
-
-PsResponseMessage = _reflection.GeneratedProtocolMessageType(
-    'PsResponseMessage',
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_PSRESPONSEMESSAGE,
-        __module__='ps_pb2'
-        # @@protoc_insertion_point(class_scope:paddle.PsResponseMessage)
-    ),
-)
-_sym_db.RegisterMessage(PsResponseMessage)
-
-FsClientParameter = _reflection.GeneratedProtocolMessageType(
-    'FsClientParameter',
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_FSCLIENTPARAMETER,
-        __module__='ps_pb2'
-        # @@protoc_insertion_point(class_scope:paddle.FsClientParameter)
-    ),
-)
-_sym_db.RegisterMessage(FsClientParameter)
-
-DESCRIPTOR.has_options = True
-DESCRIPTOR._options = _descriptor._ParseOptions(
-    descriptor_pb2.FileOptions(), _b('\200\001\001\370\001\001')
-)
-# @@protoc_insertion_point(module_scope)
diff --git a/python/paddle/fluid/incubate/fleet/tests/fleet_deep_ctr.py b/python/paddle/fluid/incubate/fleet/tests/fleet_deep_ctr.py
index e2b79936481b643fb9a3e04a0871de5f519346b0..23f5a44fe139e743d13352aea00029c355c50853 100644
--- a/python/paddle/fluid/incubate/fleet/tests/fleet_deep_ctr.py
+++ b/python/paddle/fluid/incubate/fleet/tests/fleet_deep_ctr.py
@@ -80,26 +80,23 @@ def model():
         train_file_path,
     ) = ctr_dataset_reader.prepare_data()
     """ network definition """
-    dnn_data = fluid.layers.data(
+    dnn_data = paddle.static.data(
         name="dnn_data",
         shape=[-1, 1],
         dtype="int64",
         lod_level=1,
-        append_batch_size=False,
     )
-    lr_data = fluid.layers.data(
+    lr_data = paddle.static.data(
         name="lr_data",
         shape=[-1, 1],
         dtype="int64",
         lod_level=1,
-        append_batch_size=False,
     )
-    label = fluid.layers.data(
+    label = paddle.static.data(
         name="click",
         shape=[-1, 1],
         dtype="int64",
         lod_level=0,
-        append_batch_size=False,
     )
 
     datas = [dnn_data, lr_data, label]
diff --git a/python/paddle/fluid/incubate/fleet/utils/fleet_util.py b/python/paddle/fluid/incubate/fleet/utils/fleet_util.py
index 4ec3c1d16e077ea00672c664bac3b1b4ea5e491c..e0ae707be934d9dad09faf87914ce5714fb0f20a 100644
--- a/python/paddle/fluid/incubate/fleet/utils/fleet_util.py
+++ b/python/paddle/fluid/incubate/fleet/utils/fleet_util.py
@@ -1371,8 +1371,8 @@ class FleetUtil:
                                                           local_total_ins.name)
 
               # below is part of example model
-              label = fluid.layers.data(name="click", shape=[-1, 1],\
-                  dtype="int64", lod_level=0, append_batch_size=False)
+              label = paddle.static.data(name="click", shape=[-1, 1],\
+                  dtype="int64", lod_level=0)
               emb = my_slot_net(slots, label) # emb can be fc layer of size 1
               similarity_norm = fluid.layers.sigmoid(paddle.clip(\
                   emb, min=-15.0, max=15.0), name="similarity_norm")\
@@ -1571,8 +1571,8 @@ class FleetUtil:
                                               local_total_ins.name)
 
               # below is part of model
-              label = fluid.layers.data(name="click", shape=[-1, 1],\
-                  dtype="int64", lod_level=0, append_batch_size=False)
+              label = paddle.static.data(name="click", shape=[-1, 1],\
+                  dtype="int64", lod_level=0)
               emb = my_slot_net(slots, label) # emb can be fc layer of size 1
               similarity_norm = fluid.layers.sigmoid(paddle.clip(\
                   emb, min=-15.0, max=15.0), name="similarity_norm")\
diff --git a/python/paddle/fluid/incubate/fleet/utils/utils.py b/python/paddle/fluid/incubate/fleet/utils/utils.py
index ef022c96ecf8650a8bf2b4816ef666ec42ebd006..4aff834b12eaed276bbfb6a15ec1fa3b81b02cd6 100644
--- a/python/paddle/fluid/incubate/fleet/utils/utils.py
+++ b/python/paddle/fluid/incubate/fleet/utils/utils.py
@@ -17,6 +17,7 @@ import sys
 import logging
 import subprocess
 import numpy as np
+import paddle
 from collections import OrderedDict
 import paddle.fluid as fluid
 from paddle.fluid import core
@@ -172,8 +173,9 @@ def save_var(np_array, var_name, shape_list, dtype, save_path):
     program = fluid.Program()
     place = fluid.CPUPlace()
     exe = fluid.Executor(place)
+    shape = list(shape_list)
     with fluid.program_guard(program):
-        d0_data = fluid.layers.data(var_name, shape=shape_list, dtype=dtype)
+        d0_data = paddle.static.data(var_name, shape=shape, dtype=dtype)
         append_save_op(program.global_block(), d0_data, save_path)
         exe.run(feed={var_name: np_array}, fetch_list=[])
 
@@ -183,7 +185,7 @@ def load_var(var_name, shape_list, dtype, save_path):
     place = fluid.CPUPlace()
     exe = fluid.Executor(place)
     with fluid.program_guard(program):
-        d0_data = fluid.layers.data(var_name, shape=shape_list, dtype=dtype)
+        d0_data = paddle.static.data(var_name, shape=shape_list, dtype=dtype)
         append_load_op(program.global_block(), d0_data, save_path)
         outs = exe.run(feed={}, fetch_list=[d0_data])
         return outs
diff --git a/python/paddle/fluid/install_check.py b/python/paddle/fluid/install_check.py
index bf1ad9b107f74694c80472f583287d617fdf0616..241dd71e200aba975b4660afbcdf078d61fad2dc 100644
--- a/python/paddle/fluid/install_check.py
+++ b/python/paddle/fluid/install_check.py
@@ -103,7 +103,7 @@ def run_check():
                 with unique_name.guard():
                     build_strategy = compiler.BuildStrategy()
                     build_strategy.enable_inplace = True
-                    inp = layers.data(name="inp", shape=[2, 2])
+                    inp = paddle.static.data(name="inp", shape=[-1, 2, 2])
                     simple_layer = SimpleLayer(input_size=2)
                     out = simple_layer(inp)
                     exe = executor.Executor(
@@ -138,9 +138,7 @@ def run_check():
         with executor.scope_guard(scope):
             with program_guard(train_prog, startup_prog):
                 with unique_name.guard():
-                    inp0 = layers.data(
-                        name="inp", shape=[2, 2], append_batch_size=False
-                    )
+                    inp0 = paddle.static.data(name="inp", shape=[2, 2])
                     simple_layer0 = SimpleLayer(input_size=2)
                     out0 = simple_layer0(inp0)
                     param_grads = backward.append_backward(
diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py
index 490e9412cb2a76ca8c1535928a26f9227071bd37..9faeacee886c46c277d0e3240eb5263af822dd54 100644
--- a/python/paddle/fluid/io.py
+++ b/python/paddle/fluid/io.py
@@ -355,7 +355,7 @@ def save_vars(
             main_prog = fluid.Program()
             startup_prog = fluid.Program()
             with fluid.program_guard(main_prog, startup_prog):
-                data = fluid.layers.data(name="img", shape=[64, 784], append_batch_size=False)
+                data = paddle.static.data(name="img", shape=[64, 784])
                 w = paddle.create_parameter(shape=[784, 200], dtype='float32', name='fc_w')
                 b = paddle.create_parameter(shape=[200], dtype='float32', name='fc_b')
                 hidden_w = paddle.matmul(x=data, y=w)
@@ -830,7 +830,7 @@ def load_vars(
             main_prog = fluid.Program()
             startup_prog = fluid.Program()
             with fluid.program_guard(main_prog, startup_prog):
-                data = fluid.layers.data(name="img", shape=[64, 784], append_batch_size=False)
+                data = paddle.static.data(name="img", shape=[64, 784])
                 w = paddle.create_parameter(shape=[784, 200], dtype='float32', name='fc_w')
                 b = paddle.create_parameter(shape=[200], dtype='float32', name='fc_b')
                 hidden_w = paddle.matmul(x=data, y=w)
@@ -1598,7 +1598,7 @@ def load_inference_model(
             main_prog = fluid.Program()
             startup_prog = fluid.Program()
             with fluid.program_guard(main_prog, startup_prog):
-                data = fluid.layers.data(name="img", shape=[64, 784], append_batch_size=False)
+                data = paddle.static.data(name="img", shape=[-1, 64, 784])
                 w = paddle.create_parameter(shape=[784, 200], dtype='float32')
                 b = paddle.create_parameter(shape=[200], dtype='float32')
                 hidden_w = paddle.matmul(x=data, y=w)
diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py
index e01536b2e38293147ff9cb163d29e32300c14d22..5c877c1e8f176ce50e7f54e308a49e8a89912bcd 100755
--- a/python/paddle/fluid/layers/control_flow.py
+++ b/python/paddle/fluid/layers/control_flow.py
@@ -466,7 +466,7 @@ class StaticRNN:
                         is_sparse=False)
                 # transform batch size to dim 1
                 x_emb = paddle.transpose(x_emb, perm=[1, 0, 2])
-                boot_memory = fluid.layers.data(name='boot', shape=[hidden_size], dtype='float32', lod_level=1)
+                boot_memory = paddle.static.data(name='boot', shape=[-1, hidden_size], dtype='float32', lod_level=1)
                 rnn = fluid.layers.StaticRNN()
                 with rnn.step():
                         # mark created x_emb as input, each step process a word
diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py
index e33cf23e6c1bc15f490b22f80ab0b32efe70d701..994fc98038086cbe581ed8e30ea183a75cc168f5 100644
--- a/python/paddle/fluid/layers/io.py
+++ b/python/paddle/fluid/layers/io.py
@@ -41,108 +41,7 @@ from ..framework import (
     _set_expected_place,
 )
 
-__all__ = [
-    'data',
-]
-
-
-@static_only
-def data(
-    name,
-    shape,
-    append_batch_size=True,
-    dtype='float32',
-    lod_level=0,
-    type=core.VarDesc.VarType.LOD_TENSOR,
-    stop_gradient=True,
-):
-    """
-    **Data Layer**
-
-    This operator creates the global variable. The global variables can be
-    accessed by all the following operators in the graph.
-
-    Note:
-        :code:`paddle.fluid.layers.data` is deprecated as it will be removed in
-        a later version. Please use :code:`paddle.fluid.data` .
-
-        This :code:`paddle.fluid.layers.data` set shape and dtype at compile
-        time but does NOT check the shape or the dtype of fed data, the
-        :code:`paddle.fluid.data` checks the shape and the dtype of data fed
-        by Executor or ParallelExecutor during run time.
-
-        To feed variable size inputs, users can feed variable size inputs
-        directly to this :code:`paddle.fluid.layers.data` and PaddlePaddle will
-        fit the size accordingly. Or set -1 on the variable dimension when using
-        :code:`paddle.fluid.data` .
-
-        The default :code:`stop_gradient` attribute of the Variable created by
-        this API is true, which means the gradient won't be passed backward
-        through the data Varaible. Set :code:`var.stop_gradient = False` If
-        user would like to pass backward gradient.
-
-    Args:
-       name(str): The name/alias of the variable, see :ref:`api_guide_Name`
-            for more details.
-       shape(list|tuple): Tuple declaring the shape. If :code:`append_batch_size` is
-            True and there is no -1 inside :code:`shape`, it should be
-            considered as the shape of the each sample. Otherwise, it should
-            be considered as the shape of the batched data.
-       append_batch_size(bool):
-          1. If true, it prepends -1 to the shape.
-            For example if shape=[1], the resulting shape is [-1, 1]. This will
-            be useful to set different batch size at run time.
-          2. If shape contains -1, such as shape=[1, -1].
-            append_batch_size will be enforced to be be False (ineffective)
-            because PaddlePaddle cannot set more than 1 unknown number on the
-            shape.
-       dtype(np.dtype|VarType|str): The type of the data. Supported dtype: bool,
-            float16, float32, float64, int8, int16, int32, int64, uint8.
-       type(VarType): The output type. Supported dtype: VarType.LOD_TENSOR,
-            VarType.SELECTED_ROWS, VarType.NCCL_ID. Default: VarType.LOD_TENSOR.
-       lod_level(int): The LoD Level. 0 means the input data is not a sequence.
-            Default: 0.
-       stop_gradient(bool): A boolean that mentions whether gradient should flow.
-            Default: True.
-
-    Returns:
-        The global variable that gives access to the data.
-
-    Return Type:
-        Variable
-
-    Examples:
-        .. code-block:: python
-
-          import paddle.fluid as fluid
-          data = fluid.layers.data(name='x', shape=[784], dtype='float32')
-    """
-    helper = LayerHelper('data', **locals())
-
-    check_type(name, 'name', (bytes, str), 'data')
-    check_type(shape, 'shape', (list, tuple), 'data')
-
-    shape = list(shape)
-    for i in range(len(shape)):
-        if shape[i] is None:
-            shape[i] = -1
-            append_batch_size = False
-        elif shape[i] < 0:
-            append_batch_size = False
-
-    if append_batch_size:
-        shape = [-1] + shape  # append batch size as -1
-
-    data_var = helper.create_global_variable(
-        name=name,
-        shape=shape,
-        dtype=dtype,
-        type=type,
-        stop_gradient=stop_gradient,
-        lod_level=lod_level,
-        is_data=True,
-    )
-    return data_var
+__all__ = []
 
 
 class BlockGuardServ(BlockGuard):
@@ -189,11 +88,10 @@ class ListenAndServ:
                 serv = layers.ListenAndServ(
                     "127.0.0.1:6170", ["X"], optimizer_mode=False)
                 with serv.do():
-                    x = layers.data(
+                    x = paddle.static.data(
                         shape=[32, 32],
                         dtype='float32',
-                        name="X",
-                        append_batch_size=False)
+                        name="X")
                     fluid.initializer.Constant(value=1.0)(x, main.global_block())
                     paddle.scale(x=x, scale=10.0, out=out_var)
 
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index fa0e4007eaa76ef172522dcda4aec1ad93948a26..fa0f49d01b99796c47f33269775ad73cc4c186eb 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -329,7 +329,7 @@ def _pull_sparse(
         .. code-block:: python
 
           import paddle.fluid as fluid
-          data = fluid.layers.data(name='sequence', shape=[1], dtype='int64', lod_level=1)
+          data = paddle.static.data(name='sequence', shape=[-1, 1], dtype='int64', lod_level=1)
           emb = fluid.layers.nn._pull_sparse(
               input=data, size=11, table_id=0, accessor_class="DownpourCtrAccessor")
     """
@@ -403,7 +403,7 @@ def _pull_sparse_v2(
         .. code-block:: python
 
           import paddle.fluid as fluid
-          data = fluid.layers.data(name='sequence', shape=[1], dtype='int64', lod_level=1)
+          data = paddle.static.data(name='sequence', shape=[-1, 1], dtype='int64', lod_level=1)
           emb = fluid.layers.nn._pull_sparse_v2(
               input=data, size=11, table_id=0, accessor_class="DownpourCtrAccessor")
     """
@@ -464,9 +464,9 @@ def _pull_gpups_sparse(
 
           import paddle.fluid as fluid
           slots = []
-          data_1 = fluid.layers.data(name='sequence', shape=[1], dtype='int64', lod_level=1)
+          data_1 = paddle.static.data(name='sequence', shape=[-1,1], dtype='int64', lod_level=1)
           slots.append(data_1)
-          data_2 = fluid.layers.data(name='sequence', shape=[1], dtype='int64', lod_level=1)
+          data_2 = paddle.static.data(name='sequence', shape=[-1,1], dtype='int64', lod_level=1)
           slots.append(data_2)
           embs = fluid.layers.pull_gpups_sparse(input=slots, size=[11, 35])
     """
@@ -526,7 +526,7 @@ def _pull_box_sparse(
         .. code-block:: python
 
           import paddle.fluid as fluid
-          data = fluid.layers.data(name='sequence', shape=[1], dtype='int64', lod_level=1)
+          data = paddle.static.data(name='sequence', shape=[-1,1], dtype='int64', lod_level=1)
           emb = fluid.layers.pull_box_sparse(input=data, size=[11])
     """
     helper = LayerHelper('pull_box_sparse', **locals())
@@ -711,7 +711,7 @@ def unsqueeze(input, axes, name=None):
         .. code-block:: python
 
             import paddle.fluid as fluid
-            x = fluid.layers.data(name='x', shape=[5, 10])
+            x = paddle.static.data(name='x', shape=[-1, 5, 10], dtype="float32")
             y = fluid.layers.unsqueeze(input=x, axes=[1])
 
     """
diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py
index 1bab4f34317719ae4d806a5ca73bd4ac7ae86ed8..c5aa80c74902732c20956e3d8265138381e7b205 100755
--- a/python/paddle/fluid/optimizer.py
+++ b/python/paddle/fluid/optimizer.py
@@ -893,11 +893,18 @@ class Optimizer:
         self._create_global_learning_rate()
 
         if in_dygraph_mode():
-            for param_and_grad in parameters_and_grads:
-                if param_and_grad[1] is None:
-                    continue
-                if param_and_grad[0].trainable is True:
-                    self._append_optimize_op(target_block, param_and_grad)
+            found_inf = self._get_auxiliary_var('found_inf')
+            if found_inf:
+                if isinstance(found_inf, core.eager.Tensor):
+                    self._set_auxiliary_var('found_inf', True)
+            else:
+                if isinstance(found_inf, core.eager.Tensor):
+                    self._set_auxiliary_var('found_inf', False)
+                for param_and_grad in parameters_and_grads:
+                    if param_and_grad[1] is None:
+                        continue
+                    if param_and_grad[0].trainable is True:
+                        self._append_optimize_op(target_block, param_and_grad)
         else:
             for param_and_grad in parameters_and_grads:
                 if param_and_grad[1] is None:
@@ -1431,8 +1438,8 @@ class SGDOptimizer(Optimizer):
             place = fluid.CPUPlace()
             main = fluid.Program()
             with fluid.program_guard(main):
-                x = fluid.layers.data(name='x', shape=[13], dtype='float32')
-                y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+                x = paddle.static.data(name='x', shape=[-1, 13], dtype='float32')
+                y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
                 y_predict = paddle.static.nn.fc(x, size=1, activation=None)
                 cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
                 avg_cost = paddle.mean(cost)
@@ -1623,8 +1630,8 @@ class MomentumOptimizer(Optimizer):
             place = fluid.CPUPlace()
             main = fluid.Program()
             with fluid.program_guard(main):
-                x = fluid.layers.data(name='x', shape=[13], dtype='float32')
-                y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+                x = paddle.static.data(name='x', shape=[-1, 13], dtype='float32')
+                y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
                 y_predict = paddle.static.nn.fc(x, size=1, activation=None)
                 cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
                 avg_cost = paddle.mean(cost)
@@ -1772,8 +1779,8 @@ class LarsMomentumOptimizer(Optimizer):
 
             paddle.enable_static()
             np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
-            inp = fluid.layers.data(
-                name="inp", shape=[2, 2], append_batch_size=False)
+            inp = paddle.static.data(
+                name="inp", shape=[2, 2], dtype='float32')
             out = paddle.static.nn.fc(inp, size=3)
             out = paddle.sum(out)
             optimizer = fluid.optimizer.LarsMomentumOptimizer(learning_rate=0.001, momentum=0.9)
@@ -2764,7 +2771,7 @@ class DpsgdOptimizer(Optimizer):
           train_program = fluid.Program()
           startup_program = fluid.Program()
           with fluid.program_guard(train_program, startup_program):
-              data = fluid.layers.data(name='X', shape=[1], dtype='float32')
+              data = paddle.static.data(name='X', shape=[-1,1], dtype='float32')
               hidden = paddle.static.nn.fc(x=data, size=10)
               loss = paddle.mean(hidden)
               optimizer = fluid.optimizer.Dpsgd(learning_rate=0.01, clip=10.0, batch_size=16.0, sigma=1.0)
@@ -3217,8 +3224,8 @@ class RMSPropOptimizer(Optimizer):
             place = fluid.CPUPlace()
             main = fluid.Program()
             with fluid.program_guard(main):
-                x = fluid.layers.data(name='x', shape=[13], dtype='float32')
-                y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+                x = paddle.static.data(name='x', shape=[-1, 13], dtype='float32')
+                y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
                 y_predict = paddle.static.nn.fc(x, size=1, activation=None)
                 cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
                 avg_cost = paddle.mean(cost)
@@ -3415,8 +3422,8 @@ class FtrlOptimizer(Optimizer):
             place = fluid.CPUPlace()
             main = fluid.Program()
             with fluid.program_guard(main):
-                x = fluid.layers.data(name='x', shape=[13], dtype='float32')
-                y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+                x = paddle.static.data(name='x', shape=[-1, 13], dtype='float32')
+                y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
                 y_predict = paddle.static.nn.fc(x, size=1, activation=None)
                 cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
                 avg_cost = paddle.mean(cost)
@@ -4354,11 +4361,12 @@ class PipelineOptimizer:
             import paddle
             import paddle.fluid as fluid
             import paddle.fluid.layers as layers
+            import numpy as np
 
             paddle.enable_static()
             with fluid.device_guard("gpu:0"):
-                x = fluid.layers.data(name='x', shape=[1], dtype='int64', lod_level=0)
-                y = fluid.layers.data(name='y', shape=[1], dtype='int64', lod_level=0)
+                x = paddle.static.data(name='x', shape=[-1, 1], dtype='int64', lod_level=0)
+                y = paddle.static.data(name='y', shape=[-1, 1], dtype='int64', lod_level=0)
                 data_loader = fluid.io.DataLoader.from_generator(
                     feed_list=[x, y],
                     capacity=64,
@@ -6332,8 +6340,8 @@ class RecomputeOptimizer(Optimizer):
                 )
                 sum_cost = paddle.mean(cost)
                 return sum_cost, fc_1, prediction
-            input_x = fluid.layers.data(name="x", shape=[32], dtype='float32')
-            input_y = fluid.layers.data(name="y", shape=[1], dtype='int64')
+            input_x = paddle.static.data(name="x", shape=[-1,32], dtype='float32')
+            input_y = paddle.static.data(name="y", shape=[-1,1], dtype='int64')
             cost, fc_1, pred = mlp(input_x, input_y)
 
             sgd = fluid.optimizer.Adam(learning_rate=0.01)
@@ -6410,8 +6418,8 @@ class RecomputeOptimizer(Optimizer):
                     sum_cost = paddle.mean(cost)
                     return sum_cost, fc_1, prediction
 
-                input_x = fluid.layers.data(name="x", shape=[32], dtype='float32')
-                input_y = fluid.layers.data(name="y", shape=[1], dtype='int64')
+                input_x = paddle.static.data(name="x", shape=[-1,32], dtype='float32')
+                input_y = paddle.static.data(name="y", shape=[-1,1], dtype='int64')
                 cost, fc_1, pred = mlp(input_x, input_y)
                 print("Finished FF")
 
@@ -6458,8 +6466,8 @@ class RecomputeOptimizer(Optimizer):
                     return sum_cost, fc_1, prediction
 
 
-                input_x = fluid.layers.data(name="x", shape=[32], dtype='float32')
-                input_y = fluid.layers.data(name="y", shape=[1], dtype='int64')
+                input_x = paddle.static.data(name="x", shape=[-1,32], dtype='float32')
+                input_y = paddle.static.data(name="y", shape=[-1,1], dtype='int64')
                 cost, fc_1, pred = mlp(input_x, input_y)
                 print("Finished FF")
 
@@ -6952,8 +6960,8 @@ class RecomputeOptimizer(Optimizer):
                     return sum_cost, fc_1, prediction
 
 
-                input_x = fluid.layers.data(name="x", shape=[32], dtype='float32')
-                input_y = fluid.layers.data(name="y", shape=[1], dtype='int64')
+                input_x = paddle.static.data(name="x", shape=[-1,32], dtype='float32')
+                input_y = paddle.static.data(name="y", shape=[-1,1], dtype='int64')
                 cost, fc_1, pred = mlp(input_x, input_y)
                 print("Finished FF")
 
@@ -7033,8 +7041,8 @@ class RecomputeOptimizer(Optimizer):
                     sum_cost = paddle.mean(cost)
                     return sum_cost, fc_1, prediction
 
-                input_x = fluid.layers.data(name="x", shape=[32], dtype='float32')
-                input_y = fluid.layers.data(name="y", shape=[1], dtype='int64')
+                input_x = paddle.static.data(name="x", shape=[-1,32], dtype='float32')
+                input_y = paddle.static.data(name="y", shape=[-1,1], dtype='int64')
                 cost, fc_1, pred = mlp(input_x, input_y)
                 print("Finished FF")
 
@@ -7120,8 +7128,8 @@ class LookaheadOptimizer:
 
             paddle.enable_static()
 
-            x = fluid.layers.data(name='x', shape=[2], dtype='float32')
-            label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+            x = paddle.static.data(name='x', shape=[-1,2], dtype='float32')
+            label = paddle.static.data(name="label", shape=[-1,1], dtype="int64")
             y = paddle.static.nn.fc(x=[x], size=2, activation="softmax")
             loss = paddle.nn.functional.cross_entropy(
                 input=y, label=label,
@@ -7311,8 +7319,8 @@ class GradientMergeOptimizer:
             sum_cost = paddle.mean(cost)
             return sum_cost, fc_1, prediction
 
-        input_x = fluid.layers.data(name="x", shape=[32], dtype='float32')
-        input_y = fluid.layers.data(name="y", shape=[1], dtype='int64')
+        input_x = paddle.static.data(name="x", shape=[-1,32], dtype='float32')
+        input_y = paddle.static.data(name="y", shape=[-1,1], dtype='int64')
         cost, fc_1, pred = mlp(input_x, input_y)
         sgd = fluid.optimizer.Adam(learning_rate=0.01)
         sgd = fluid.optimizer.GradientMergeOptimizer(sgd, k_steps=4, avg=True)
diff --git a/python/paddle/fluid/reader.py b/python/paddle/fluid/reader.py
index e6cabbdde92683eaaa19210bbad5f373bf420b24..8e36dd28005be7cf0c21423ae93d8e6a62c2d88e 100644
--- a/python/paddle/fluid/reader.py
+++ b/python/paddle/fluid/reader.py
@@ -1643,7 +1643,7 @@ class PyReader(DataLoaderBase):
 
     Args:
         feed_list (list(Variable)|tuple(Variable)): feed variable list.
-            The variables should be created by :code:`fluid.layers.data()`.
+            The variables should be created by :code:`paddle.static.data()`.
         capacity (int): capacity of the queue maintained in PyReader.
             The unit is batch number. Set larger capacity if your reader
             is fast.
diff --git a/python/paddle/fluid/regularizer.py b/python/paddle/fluid/regularizer.py
index 36e648e3e38edc6d733f7d9952519a6189db104b..84bfa351c962db527c72d68dd77ad89b4e855336 100644
--- a/python/paddle/fluid/regularizer.py
+++ b/python/paddle/fluid/regularizer.py
@@ -74,8 +74,8 @@ class L2DecayRegularizer(WeightDecayRegularizer):
             main_prog = fluid.Program()
             startup_prog = fluid.Program()
             with fluid.program_guard(main_prog, startup_prog):
-                data = fluid.layers.data(name='image', shape=[3, 28, 28], dtype='float32')
-                label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+                data = paddle.static.data(name='image', shape=[-1, 3, 28, 28], dtype='float32')
+                label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
                 hidden = paddle.static.nn.fc(x=data, size=128, activation='relu')
                 prediction = paddle.static.nn.fc(x=hidden, size=10, activation='softmax')
                 loss = paddle.nn.functional.cross_entropy(
@@ -193,8 +193,8 @@ class L1DecayRegularizer(WeightDecayRegularizer):
             main_prog = fluid.Program()
             startup_prog = fluid.Program()
             with fluid.program_guard(main_prog, startup_prog):
-                data = fluid.layers.data(name='image', shape=[3, 28, 28], dtype='float32')
-                label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+                data = paddle.static.data(name='image', shape=[-1, 3, 28, 28], dtype='float32')
+                label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
                 hidden = paddle.static.nn.fc(x=data, size=128, activation='relu')
                 prediction = paddle.static.nn.fc(x=hidden, size=10, activation='softmax')
                 loss = paddle.nn.functional.cross_entropy(
diff --git a/python/paddle/fluid/tests/book/notest_understand_sentiment.py b/python/paddle/fluid/tests/book/notest_understand_sentiment.py
index c406fae5f811c8edf9dc45209f2e576663e35832..0eb09b210bd02962f75b48a891ac28a0c6610d72 100644
--- a/python/paddle/fluid/tests/book/notest_understand_sentiment.py
+++ b/python/paddle/fluid/tests/book/notest_understand_sentiment.py
@@ -68,10 +68,10 @@ def train(
     dict_dim = len(word_dict)
     class_dim = 2
 
-    data = fluid.layers.data(
-        name="words", shape=[1], dtype="int64", lod_level=1
+    data = paddle.static.data(
+        name="words", shape=[-1, 1], dtype="int64", lod_level=1
     )
-    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+    label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64")
 
     if not parallel:
         cost, acc_out, prediction = net_method(
diff --git a/python/paddle/fluid/tests/book/test_fit_a_line.py b/python/paddle/fluid/tests/book/test_fit_a_line.py
index b6efc9775efa4572afd1706441ee085345b74aef..c7c930fd53b5880eb5c83fff5777ac9d1e7db5d2 100644
--- a/python/paddle/fluid/tests/book/test_fit_a_line.py
+++ b/python/paddle/fluid/tests/book/test_fit_a_line.py
@@ -49,8 +49,10 @@ def convert_float_to_uint16(in_list):
 
 
 def train(use_cuda, save_dirname, is_local, use_bf16, pure_bf16):
-    x = fluid.layers.data(name='x', shape=[13], dtype='float32')
-    y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+    x = paddle.static.data(name='x', shape=[-1, 13], dtype='float32')
+    x.desc.set_need_check_feed(False)
+    y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
+    y.desc.set_need_check_feed(False)
 
     if use_bf16:
         if not pure_bf16:
diff --git a/python/paddle/fluid/tests/book/test_image_classification.py b/python/paddle/fluid/tests/book/test_image_classification.py
index f1fa47afb809ab3e247cf15d7d2d9379b2185340..6ee33951583a1cf2be7acaac7fa49d71c71a5c49 100644
--- a/python/paddle/fluid/tests/book/test_image_classification.py
+++ b/python/paddle/fluid/tests/book/test_image_classification.py
@@ -104,8 +104,10 @@ def train(net_type, use_cuda, save_dirname, is_local):
     classdim = 10
     data_shape = [3, 32, 32]
 
-    images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    images = paddle.static.data(
+        name='pixel', shape=[-1] + data_shape, dtype='float32'
+    )
+    label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
 
     if net_type == "vgg":
         print("train vgg net")
diff --git a/python/paddle/fluid/tests/book/test_recognize_digits.py b/python/paddle/fluid/tests/book/test_recognize_digits.py
index 85d946e2185846b44fcd852842343eab04020680..5d6939af8902fdca51b6b4486788269194524b6b 100644
--- a/python/paddle/fluid/tests/book/test_recognize_digits.py
+++ b/python/paddle/fluid/tests/book/test_recognize_digits.py
@@ -77,8 +77,8 @@ def train(
 ):
     if use_cuda and not fluid.core.is_compiled_with_cuda():
         return
-    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    img = paddle.static.data(name='img', shape=[-1, 1, 28, 28], dtype='float32')
+    label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
 
     if nn_type == 'mlp':
         net_conf = mlp
diff --git a/python/paddle/fluid/tests/book/test_recommender_system.py b/python/paddle/fluid/tests/book/test_recommender_system.py
index aad9e7ce01c2c0dc654fcd7cfc09d3521c4aa1bb..1564b0594f2a12b25378ccab748bd6230ca805e6 100644
--- a/python/paddle/fluid/tests/book/test_recommender_system.py
+++ b/python/paddle/fluid/tests/book/test_recommender_system.py
@@ -40,7 +40,7 @@ def get_usr_combined_features():
 
     USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1
 
-    uid = layers.data(name='user_id', shape=[1], dtype='int64')
+    uid = paddle.static.data(name='user_id', shape=[-1, 1], dtype='int64')
 
     usr_emb = layers.embedding(
         input=uid,
@@ -54,7 +54,9 @@ def get_usr_combined_features():
 
     USR_GENDER_DICT_SIZE = 2
 
-    usr_gender_id = layers.data(name='gender_id', shape=[1], dtype='int64')
+    usr_gender_id = paddle.static.data(
+        name='gender_id', shape=[-1, 1], dtype='int64'
+    )
 
     usr_gender_emb = layers.embedding(
         input=usr_gender_id,
@@ -66,7 +68,7 @@ def get_usr_combined_features():
     usr_gender_fc = paddle.static.nn.fc(x=usr_gender_emb, size=16)
 
     USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table)
-    usr_age_id = layers.data(name='age_id', shape=[1], dtype="int64")
+    usr_age_id = paddle.static.data(name='age_id', shape=[-1, 1], dtype="int64")
 
     usr_age_emb = layers.embedding(
         input=usr_age_id,
@@ -78,7 +80,7 @@ def get_usr_combined_features():
     usr_age_fc = paddle.static.nn.fc(x=usr_age_emb, size=16)
 
     USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1
-    usr_job_id = layers.data(name='job_id', shape=[1], dtype="int64")
+    usr_job_id = paddle.static.data(name='job_id', shape=[-1, 1], dtype="int64")
 
     usr_job_emb = layers.embedding(
         input=usr_job_id,
@@ -104,7 +106,7 @@ def get_mov_combined_features():
 
     MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1
 
-    mov_id = layers.data(name='movie_id', shape=[1], dtype='int64')
+    mov_id = paddle.static.data(name='movie_id', shape=[-1, 1], dtype='int64')
 
     mov_emb = layers.embedding(
         input=mov_id,
@@ -118,8 +120,8 @@ def get_mov_combined_features():
 
     CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories())
 
-    category_id = layers.data(
-        name='category_id', shape=[1], dtype='int64', lod_level=1
+    category_id = paddle.static.data(
+        name='category_id', shape=[-1, 1], dtype='int64', lod_level=1
     )
 
     mov_categories_emb = layers.embedding(
@@ -132,8 +134,8 @@ def get_mov_combined_features():
 
     MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict())
 
-    mov_title_id = layers.data(
-        name='movie_title', shape=[1], dtype='int64', lod_level=1
+    mov_title_id = paddle.static.data(
+        name='movie_title', shape=[-1, 1], dtype='int64', lod_level=1
     )
 
     mov_title_emb = layers.embedding(
@@ -170,7 +172,7 @@ def model():
     )
     scale_infer = paddle.scale(x=inference, scale=5.0)
 
-    label = layers.data(name='score', shape=[1], dtype='float32')
+    label = paddle.static.data(name='score', shape=[-1, 1], dtype='float32')
     square_cost = paddle.nn.functional.square_error_cost(
         input=scale_infer, label=label
     )
diff --git a/python/paddle/fluid/tests/book/test_word2vec_book.py b/python/paddle/fluid/tests/book/test_word2vec_book.py
index e932394e8cc0952f417d2d155c170c9d0d3a01d2..f117c523a74444570630b6ed06763c906ea87b2a 100644
--- a/python/paddle/fluid/tests/book/test_word2vec_book.py
+++ b/python/paddle/fluid/tests/book/test_word2vec_book.py
@@ -108,11 +108,13 @@ def train(
     word_dict = paddle.dataset.imikolov.build_dict()
     dict_size = len(word_dict)
 
-    first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64')
-    second_word = fluid.layers.data(name='secondw', shape=[1], dtype='int64')
-    third_word = fluid.layers.data(name='thirdw', shape=[1], dtype='int64')
-    forth_word = fluid.layers.data(name='forthw', shape=[1], dtype='int64')
-    next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
+    first_word = paddle.static.data(name='firstw', shape=[-1, 1], dtype='int64')
+    second_word = paddle.static.data(
+        name='secondw', shape=[-1, 1], dtype='int64'
+    )
+    third_word = paddle.static.data(name='thirdw', shape=[-1, 1], dtype='int64')
+    forth_word = paddle.static.data(name='forthw', shape=[-1, 1], dtype='int64')
+    next_word = paddle.static.data(name='nextw', shape=[-1, 1], dtype='int64')
 
     if not is_parallel:
         avg_cost, predict_word = __network__(
diff --git a/python/paddle/fluid/tests/test_data_feeder.py b/python/paddle/fluid/tests/test_data_feeder.py
index 4cbdba6419501cdba3ec671eb055bd04f4e1537c..517d874f4544efdb6023d9d94715b6b8ce52c64d 100644
--- a/python/paddle/fluid/tests/test_data_feeder.py
+++ b/python/paddle/fluid/tests/test_data_feeder.py
@@ -22,8 +22,8 @@ paddle.enable_static()
 
 class TestDataFeeder(unittest.TestCase):
     def test_lod_level_0_converter(self):
-        img = fluid.layers.data(name='image', shape=[1, 28, 28])
-        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+        img = paddle.static.data(name='image', shape=[-1, 1, 28, 28])
+        label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
         feeder = fluid.DataFeeder([img, label], fluid.CPUPlace())
         result = feeder.feed([([0] * 784, [9]), ([1] * 784, [1])])
 
@@ -41,10 +41,10 @@ class TestDataFeeder(unittest.TestCase):
     def test_lod_level_1_converter(self):
         # lod_level = 1
         # each sentence has a different number of words
-        sentences = fluid.layers.data(
-            name='sentences', shape=[1], dtype='int64', lod_level=1
+        sentences = paddle.static.data(
+            name='sentences', shape=[-1, 1], dtype='int64', lod_level=1
         )
-        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+        label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
         feeder = fluid.DataFeeder([sentences, label], fluid.CPUPlace())
 
         # lod = [[0, 3, 5, 9]]
@@ -64,10 +64,10 @@ class TestDataFeeder(unittest.TestCase):
     def test_lod_level_2_converter(self):
         # lod_level = 2
         # paragraphs -> sentences -> words
-        paragraphs = fluid.layers.data(
-            name='paragraphs', shape=[1], dtype='int64', lod_level=2
+        paragraphs = paddle.static.data(
+            name='paragraphs', shape=[-1, 1], dtype='int64', lod_level=2
         )
-        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+        label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
         feeder = fluid.DataFeeder([paragraphs, label], fluid.CPUPlace())
 
         # lod = [[0, 2, 3], [0, 3, 5, 9]]
diff --git a/python/paddle/fluid/tests/test_detection.py b/python/paddle/fluid/tests/test_detection.py
index aa97a320549290b21a7c54c506d0ee966793b307..39b17cd0634be23bd81342e178087bb1815dbfe6 100644
--- a/python/paddle/fluid/tests/test_detection.py
+++ b/python/paddle/fluid/tests/test_detection.py
@@ -20,7 +20,6 @@ from unittests.test_imperative_base import new_program_scope
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 from paddle.fluid import core
 from paddle.fluid.dygraph import base
 from paddle.fluid.framework import Program, program_guard
@@ -154,10 +153,12 @@ class TestMulticlassNMS2(unittest.TestCase):
     def test_multiclass_nms2(self):
         program = Program()
         with program_guard(program):
-            bboxes = layers.data(
+            bboxes = paddle.static.data(
                 name='bboxes', shape=[-1, 10, 4], dtype='float32'
             )
-            scores = layers.data(name='scores', shape=[-1, 10], dtype='float32')
+            scores = paddle.static.data(
+                name='scores', shape=[-1, 10], dtype='float32'
+            )
             output = fluid.contrib.multiclass_nms2(
                 bboxes, scores, 0.3, 400, 200, 0.7
             )
diff --git a/python/paddle/fluid/tests/test_error_clip.py b/python/paddle/fluid/tests/test_error_clip.py
index 9dc0771a39fb868792f17dd5497ee7031804147e..0c1d8614938cd4f5e97ae132966f7f33e13009df 100644
--- a/python/paddle/fluid/tests/test_error_clip.py
+++ b/python/paddle/fluid/tests/test_error_clip.py
@@ -23,13 +23,13 @@ paddle.enable_static()
 prog = fluid.framework.Program()
 
 with fluid.program_guard(main_program=prog):
-    image = fluid.layers.data(name='x', shape=[784], dtype='float32')
+    image = paddle.static.data(name='x', shape=[-1, 784], dtype='float32')
 
     hidden1 = paddle.static.nn.fc(x=image, size=128, activation='relu')
     hidden2 = paddle.static.nn.fc(x=hidden1, size=64, activation='relu')
     predict = paddle.static.nn.fc(x=hidden2, size=10, activation='softmax')
 
-    label = fluid.layers.data(name='y', shape=[1], dtype='int64')
+    label = paddle.static.data(name='y', shape=[-1, 1], dtype='int64')
 
     cost = paddle.nn.functional.cross_entropy(
         input=predict, label=label, reduction='none', use_softmax=False
diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt
index 2eea2070befe39a230c4ac77306983abe0be4ca9..521b8d79885bae3a023048f8dfdbbc720b99a155 100755
--- a/python/paddle/fluid/tests/unittests/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt
@@ -100,6 +100,7 @@ if(((NOT WITH_ROCM) AND (NOT WITH_GPU)) OR WIN32)
   list(REMOVE_ITEM TEST_OPS test_fleet_executor_task_node)
   list(REMOVE_ITEM TEST_OPS test_fleet_exe_dist_model_run)
   list(REMOVE_ITEM TEST_OPS test_fleet_exe_dist_model_tensor)
+  list(REMOVE_ITEM TEST_OPS test_fleet_executor_cond_interceptor)
 endif()
 
 list(REMOVE_ITEM TEST_OPS test_deprecated_decorator)
diff --git a/python/paddle/fluid/tests/unittests/check_nan_inf_base.py b/python/paddle/fluid/tests/unittests/check_nan_inf_base.py
index 551a9eed8baa8f4795801a01c70e8363c2e9830e..954e06e2a81f05d654fc19b41cb1ec8342760c22 100644
--- a/python/paddle/fluid/tests/unittests/check_nan_inf_base.py
+++ b/python/paddle/fluid/tests/unittests/check_nan_inf_base.py
@@ -46,8 +46,8 @@ def generator():
 
 
 def net():
-    x = fluid.layers.data(name="x", shape=[3], dtype='float32')
-    y = fluid.layers.data(name="y", shape=[1], dtype='int64')
+    x = paddle.static.data(name="x", shape=[-1, 3], dtype='float32')
+    y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64')
 
     # test int64 value
     zero = fluid.layers.fill_constant(shape=[1], dtype='int64', value=0)
diff --git a/python/paddle/fluid/tests/unittests/collective/collective_allgather_api.py b/python/paddle/fluid/tests/unittests/collective/collective_allgather_api.py
index 72e28c21f250c11e74522b55a92d89db5d3891cd..6eb1f562860baee24948ad259ebd3eec15525c20 100644
--- a/python/paddle/fluid/tests/unittests/collective/collective_allgather_api.py
+++ b/python/paddle/fluid/tests/unittests/collective/collective_allgather_api.py
@@ -20,7 +20,6 @@ import test_collective_api_base as test_base
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 
 paddle.enable_static()
 
@@ -33,7 +32,9 @@ class TestCollectiveAllgatherAPI(test_base.TestCollectiveAPIRunnerBase):
         dtype = "float32" if dtype is None else dtype
         with fluid.program_guard(main_prog, startup_program):
             tensor_list = []
-            tindata = layers.data(name="tindata", shape=[10, 1000], dtype=dtype)
+            tindata = paddle.static.data(
+                name="tindata", shape=[-1, 10, 1000], dtype=dtype
+            )
             paddle.distributed.all_gather(tensor_list, tindata)
             return tensor_list
 
diff --git a/python/paddle/fluid/tests/unittests/collective/collective_allreduce_api.py b/python/paddle/fluid/tests/unittests/collective/collective_allreduce_api.py
index e995cd8d37b6623cbb6f7ef2739d21b545e8a46d..291ad384f3e66e357661eed02a0c1bd406b4d365 100644
--- a/python/paddle/fluid/tests/unittests/collective/collective_allreduce_api.py
+++ b/python/paddle/fluid/tests/unittests/collective/collective_allreduce_api.py
@@ -16,7 +16,6 @@ from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 
 paddle.enable_static()
 
@@ -27,8 +26,8 @@ class TestCollectiveAllreduceAPI(TestCollectiveAPIRunnerBase):
 
     def get_model(self, main_prog, startup_program, rank):
         with fluid.program_guard(main_prog, startup_program):
-            tindata = layers.data(
-                name="tindata", shape=[10, 1000], dtype='float32'
+            tindata = paddle.static.data(
+                name="tindata", shape=[-1, 10, 1000], dtype='float32'
             )
             paddle.distributed.all_reduce(tindata)
             return [tindata]
diff --git a/python/paddle/fluid/tests/unittests/collective/collective_allreduce_new_group_api.py b/python/paddle/fluid/tests/unittests/collective/collective_allreduce_new_group_api.py
index ba5580eb28645c78a5ffe3a5ccc189d1aea1cd88..b66fdbedd2e0469a5d6fb8b1452d7bc1c1c8ecd5 100644
--- a/python/paddle/fluid/tests/unittests/collective/collective_allreduce_new_group_api.py
+++ b/python/paddle/fluid/tests/unittests/collective/collective_allreduce_new_group_api.py
@@ -16,7 +16,6 @@ from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 
 paddle.enable_static()
 
@@ -27,8 +26,8 @@ class TestCollectiveAllreduceNewGroupAPI(TestCollectiveAPIRunnerBase):
 
     def get_model(self, main_prog, startup_program, rank):
         with fluid.program_guard(main_prog, startup_program):
-            tindata = layers.data(
-                name="tindata", shape=[10, 1000], dtype='float32'
+            tindata = paddle.static.data(
+                name="tindata", shape=[1, 10, 1000], dtype='float32'
             )
             gp = paddle.distributed.new_group([0, 1])
             paddle.distributed.all_reduce(tindata, group=gp, sync_op=True)
diff --git a/python/paddle/fluid/tests/unittests/collective/collective_allreduce_op.py b/python/paddle/fluid/tests/unittests/collective/collective_allreduce_op.py
index 3f3d0e8c0a639767e5972b489ef150b784e6acb4..250156905e024188a871b6a54633e44c19d6238b 100644
--- a/python/paddle/fluid/tests/unittests/collective/collective_allreduce_op.py
+++ b/python/paddle/fluid/tests/unittests/collective/collective_allreduce_op.py
@@ -16,7 +16,6 @@ from test_collective_base import TestCollectiveRunnerBase, runtime_main
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 from paddle.fluid import core
 
 paddle.enable_static()
@@ -29,9 +28,10 @@ class TestCollectiveAllreduce(TestCollectiveRunnerBase):
     def get_model(self, main_prog, startup_program):
         ring_id = 0
         with fluid.program_guard(main_prog, startup_program):
-            tindata = layers.data(
-                name="tindata", shape=[10, 1000], dtype='float32'
+            tindata = paddle.static.data(
+                name="tindata", shape=[-1, 10, 1000], dtype='float32'
             )
+            tindata.desc.set_need_check_feed(False)
             toutdata = main_prog.current_block().create_var(
                 name="outofallreduce",
                 dtype='float32',
diff --git a/python/paddle/fluid/tests/unittests/collective/collective_allreduce_op_wait.py b/python/paddle/fluid/tests/unittests/collective/collective_allreduce_op_wait.py
index 0fce303f784c6d0496e42c902cbbde4dec2085e8..b0d4eeec950993a7b193549eb76fab6b23f40f62 100644
--- a/python/paddle/fluid/tests/unittests/collective/collective_allreduce_op_wait.py
+++ b/python/paddle/fluid/tests/unittests/collective/collective_allreduce_op_wait.py
@@ -16,7 +16,6 @@ from test_collective_base import TestCollectiveRunnerBase, runtime_main
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 from paddle.fluid import core
 
 paddle.enable_static()
@@ -29,9 +28,10 @@ class TestCollectiveAllreduce(TestCollectiveRunnerBase):
     def get_model(self, main_prog, startup_program):
         ring_id = 0
         with fluid.program_guard(main_prog, startup_program):
-            tindata = layers.data(
-                name="tindata", shape=[10, 1000], dtype='float32'
+            tindata = paddle.static.data(
+                name="tindata", shape=[-1, 10, 1000], dtype='float32'
             )
+            tindata.desc.set_need_check_feed(False)
             toutdata = main_prog.current_block().create_var(
                 name="outofallreduce",
                 dtype='float32',
diff --git a/python/paddle/fluid/tests/unittests/collective/collective_alltoall_api.py b/python/paddle/fluid/tests/unittests/collective/collective_alltoall_api.py
index 0bfc3951f9146ede7364b613916481a3d076ffcc..1b5d0082ead7cfeb44c565007f07522e810eef5b 100644
--- a/python/paddle/fluid/tests/unittests/collective/collective_alltoall_api.py
+++ b/python/paddle/fluid/tests/unittests/collective/collective_alltoall_api.py
@@ -16,7 +16,6 @@ from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 
 paddle.enable_static()
 
@@ -27,9 +26,10 @@ class TestCollectiveAllToAllAPI(TestCollectiveAPIRunnerBase):
 
     def get_model(self, main_prog, startup_program, rank):
         with fluid.program_guard(main_prog, startup_program):
-            tindata = layers.data(
-                name="tindata", shape=[10, 1000], dtype='float32'
+            tindata = paddle.static.data(
+                name="tindata", shape=[-1, 10, 1000], dtype='float32'
             )
+            tindata.desc.set_need_check_feed(False)
             tindata = paddle.split(tindata, 2, axis=0)
             tout_data = []
             paddle.distributed.alltoall(tindata, tout_data)
diff --git a/python/paddle/fluid/tests/unittests/collective/collective_broadcast_api.py b/python/paddle/fluid/tests/unittests/collective/collective_broadcast_api.py
index 046501ca205fdc6fa1c2b99a324e89f8c310db4e..5011a5b79eba692eeb6b44880b50667d04111e96 100644
--- a/python/paddle/fluid/tests/unittests/collective/collective_broadcast_api.py
+++ b/python/paddle/fluid/tests/unittests/collective/collective_broadcast_api.py
@@ -16,7 +16,6 @@ from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 
 paddle.enable_static()
 
@@ -27,9 +26,10 @@ class TestCollectiveBroadcastAPI(TestCollectiveAPIRunnerBase):
 
     def get_model(self, main_prog, startup_program, rank):
         with fluid.program_guard(main_prog, startup_program):
-            tindata = layers.data(
-                name="tindata", shape=[10, 1000], dtype='float32'
+            tindata = paddle.static.data(
+                name="tindata", shape=[-1, 10, 1000], dtype='float32'
             )
+            tindata.desc.set_need_check_feed(False)
             paddle.distributed.broadcast(tindata, src=1)
             return [tindata]
 
diff --git a/python/paddle/fluid/tests/unittests/collective/collective_broadcast_op.py b/python/paddle/fluid/tests/unittests/collective/collective_broadcast_op.py
index 8d02dd771bfdbdcb6bc121a7f62411cce6d12ae6..7c49f1f55ebe6975a10ee03809ae98f51337fc1b 100644
--- a/python/paddle/fluid/tests/unittests/collective/collective_broadcast_op.py
+++ b/python/paddle/fluid/tests/unittests/collective/collective_broadcast_op.py
@@ -16,7 +16,6 @@ from test_collective_base import TestCollectiveRunnerBase, runtime_main
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 from paddle.fluid import core
 
 paddle.enable_static()
@@ -30,9 +29,10 @@ class TestCollectiveBroadcast(TestCollectiveRunnerBase):
         ring_id = 0
         rootid = 1
         with fluid.program_guard(main_prog, startup_program):
-            tindata = layers.data(
-                name="tindata", shape=[10, 1000], dtype='float32'
+            tindata = paddle.static.data(
+                name="tindata", shape=[-1, 10, 1000], dtype='float32'
             )
+            tindata.desc.set_need_check_feed(False)
             toutdata = main_prog.current_block().create_var(
                 name="outofbroadcast",
                 dtype='float32',
diff --git a/python/paddle/fluid/tests/unittests/collective/collective_concat_op.py b/python/paddle/fluid/tests/unittests/collective/collective_concat_op.py
index dd85905caf288c0f2d315163355713e62527df45..df784d2815eeeae338bb255784f465b21061c105 100644
--- a/python/paddle/fluid/tests/unittests/collective/collective_concat_op.py
+++ b/python/paddle/fluid/tests/unittests/collective/collective_concat_op.py
@@ -16,7 +16,6 @@ from test_collective_base import TestCollectiveRunnerBase, runtime_main
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 from paddle.fluid import core
 
 paddle.enable_static()
@@ -30,9 +29,10 @@ class TestCollectiveConcat(TestCollectiveRunnerBase):
         ring_id = 0
         nranks = 2
         with fluid.program_guard(main_prog, startup_program):
-            tindata = layers.data(
-                name="tindata", shape=[10, 1000], dtype='float32'
+            tindata = paddle.static.data(
+                name="tindata", shape=[-1, 10, 1000], dtype='float32'
             )
+            tindata.desc.set_need_check_feed(False)
             toutdata = main_prog.current_block().create_var(
                 name="outofconcat",
                 dtype='float32',
diff --git a/python/paddle/fluid/tests/unittests/collective/collective_identity_op.py b/python/paddle/fluid/tests/unittests/collective/collective_identity_op.py
index c4b993d8a311fb6d3e76769f0fd5a996cfdde0fc..4926076b6b0b3db77cbf527fdb9ee1beff80cca0 100644
--- a/python/paddle/fluid/tests/unittests/collective/collective_identity_op.py
+++ b/python/paddle/fluid/tests/unittests/collective/collective_identity_op.py
@@ -16,7 +16,6 @@ from test_collective_base import TestCollectiveRunnerBase, runtime_main
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 from paddle.fluid import core
 
 paddle.enable_static()
@@ -30,9 +29,10 @@ class TestCollectiveIdentity(TestCollectiveRunnerBase):
         ring_id = 0
         nranks = 2
         with fluid.program_guard(main_prog, startup_program):
-            tindata = layers.data(
-                name="tindata", shape=[10, 1000], dtype='float32'
+            tindata = paddle.static.data(
+                name="tindata", shape=[-1, 10, 1000], dtype='float32'
             )
+            tindata.desc.set_need_check_feed(False)
             toutdata = main_prog.current_block().create_var(
                 name="outofgather",
                 dtype='float32',
diff --git a/python/paddle/fluid/tests/unittests/collective/collective_reduce_api.py b/python/paddle/fluid/tests/unittests/collective/collective_reduce_api.py
index fb16088b4b87fafe797038266f59e76804aa3eea..6f033c7d1fdeb24f9b46ee6470d451cf0adfd2fa 100644
--- a/python/paddle/fluid/tests/unittests/collective/collective_reduce_api.py
+++ b/python/paddle/fluid/tests/unittests/collective/collective_reduce_api.py
@@ -16,7 +16,6 @@ from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 
 paddle.enable_static()
 
@@ -27,9 +26,10 @@ class TestCollectiveReduceAPI(TestCollectiveAPIRunnerBase):
 
     def get_model(self, main_prog, startup_program, rank):
         with fluid.program_guard(main_prog, startup_program):
-            tindata = layers.data(
-                name="tindata", shape=[10, 1000], dtype='float32'
+            tindata = paddle.static.data(
+                name="tindata", shape=[-1, 10, 1000], dtype='float32'
             )
+            tindata.desc.set_need_check_feed(False)
             paddle.distributed.reduce(tindata, dst=0)
             return [tindata]
 
diff --git a/python/paddle/fluid/tests/unittests/collective/collective_reduce_op.py b/python/paddle/fluid/tests/unittests/collective/collective_reduce_op.py
index 8e75d5ee913020db9c824542db0852a2b0987392..9c340821130910d29bd8aa69376c2305e9b0c50f 100644
--- a/python/paddle/fluid/tests/unittests/collective/collective_reduce_op.py
+++ b/python/paddle/fluid/tests/unittests/collective/collective_reduce_op.py
@@ -16,7 +16,6 @@ from test_collective_base import TestCollectiveRunnerBase, runtime_main
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 from paddle.fluid import core
 
 paddle.enable_static()
@@ -30,9 +29,10 @@ class TestCollectiveReduce(TestCollectiveRunnerBase):
         ring_id = 0
         rootid = 1
         with fluid.program_guard(main_prog, startup_program):
-            tindata = layers.data(
-                name="tindata", shape=[10, 1000], dtype='float32'
+            tindata = paddle.static.data(
+                name="tindata", shape=[-1, 10, 1000], dtype='float32'
             )
+            tindata.desc.set_need_check_feed(False)
             toutdata = main_prog.current_block().create_var(
                 name="outofreduce",
                 dtype='float32',
diff --git a/python/paddle/fluid/tests/unittests/collective/collective_reduce_op_calc_stream.py b/python/paddle/fluid/tests/unittests/collective/collective_reduce_op_calc_stream.py
index 7d122764f5d4da6b9a094faa4bbc9760e36c3774..a4ae2a3623f60bdfcac94f9470418fdccb8b8ed2 100644
--- a/python/paddle/fluid/tests/unittests/collective/collective_reduce_op_calc_stream.py
+++ b/python/paddle/fluid/tests/unittests/collective/collective_reduce_op_calc_stream.py
@@ -16,7 +16,6 @@ from test_collective_base import TestCollectiveRunnerBase, runtime_main
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 from paddle.fluid import core
 
 paddle.enable_static()
@@ -30,9 +29,11 @@ class TestCollectiveReduce(TestCollectiveRunnerBase):
         ring_id = 0
         rootid = 1
         with fluid.program_guard(main_prog, startup_program):
-            tindata = layers.data(
-                name="tindata", shape=[10, 1000], dtype='float32'
+            tindata = paddle.static.data(
+                name="tindata", shape=[-1, 10, 1000], dtype='float32'
             )
+            tindata.desc.set_need_check_feed(False)
+
             toutdata = main_prog.current_block().create_var(
                 name="outofreduce",
                 dtype='float32',
diff --git a/python/paddle/fluid/tests/unittests/collective/collective_scatter_api.py b/python/paddle/fluid/tests/unittests/collective/collective_scatter_api.py
index f4671af0dfc7f09ba2fa973c61637860b56ccec6..de7ec2cfcd377d7495a63e41e3a899f763100c57 100644
--- a/python/paddle/fluid/tests/unittests/collective/collective_scatter_api.py
+++ b/python/paddle/fluid/tests/unittests/collective/collective_scatter_api.py
@@ -27,11 +27,10 @@ class TestCollectiveScatterAPI(TestCollectiveAPIRunnerBase):
 
     def get_model(self, main_prog, startup_program, rank):
         with fluid.program_guard(main_prog, startup_program):
-            tindata = layers.data(
+            tindata = paddle.static.data(
                 name="tindata",
                 shape=[10, 1000],
                 dtype='float32',
-                append_batch_size=False,
             )
             toutdata = layers.fill_constant(
                 shape=[5, 1000], dtype='float32', value=1.0
diff --git a/python/paddle/fluid/tests/unittests/collective/collective_scatter_op.py b/python/paddle/fluid/tests/unittests/collective/collective_scatter_op.py
index 798485b638d40359a2937f37ffe5a9aec121ac82..56aa7210d27606b3bd23933c47ba63e7dd280e7b 100644
--- a/python/paddle/fluid/tests/unittests/collective/collective_scatter_op.py
+++ b/python/paddle/fluid/tests/unittests/collective/collective_scatter_op.py
@@ -16,7 +16,6 @@ from test_collective_base import TestCollectiveRunnerBase, runtime_main
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 from paddle.fluid import core
 
 paddle.enable_static()
@@ -30,9 +29,10 @@ class TestCollectiveScatter(TestCollectiveRunnerBase):
         ring_id = 0
         rootid = 1
         with fluid.program_guard(main_prog, startup_program):
-            tindata = layers.data(
-                name="tindata", shape=[10, 1000], dtype='float32'
+            tindata = paddle.static.data(
+                name="tindata", shape=[-1, 10, 1000], dtype='float32'
             )
+            tindata.desc.set_need_check_feed(False)
             toutdata = main_prog.current_block().create_var(
                 name="outofreduce",
                 dtype='float32',
diff --git a/python/paddle/fluid/tests/unittests/collective/collective_sendrecv_api.py b/python/paddle/fluid/tests/unittests/collective/collective_sendrecv_api.py
index bcdb8342cadd28e80709c623c65b79564dd18d49..e0a2487743d4b51ef0e016b1bd7bf5d1c69f5b7e 100644
--- a/python/paddle/fluid/tests/unittests/collective/collective_sendrecv_api.py
+++ b/python/paddle/fluid/tests/unittests/collective/collective_sendrecv_api.py
@@ -16,7 +16,6 @@ from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 
 paddle.enable_static()
 
@@ -27,11 +26,10 @@ class TestCollectiveSendRecvAPI(TestCollectiveAPIRunnerBase):
 
     def get_model(self, main_prog, startup_program, rank):
         with fluid.program_guard(main_prog, startup_program):
-            tindata = layers.data(
+            tindata = paddle.static.data(
                 name="tindata",
                 shape=[10, 1000],
                 dtype='float32',
-                append_batch_size=False,
             )
             if rank == 0:
                 paddle.distributed.send(tindata, dst=1)
diff --git a/python/paddle/fluid/tests/unittests/collective/collective_sendrecv_op.py b/python/paddle/fluid/tests/unittests/collective/collective_sendrecv_op.py
index eab498073323e49cbe84871cbf78dea8122c7535..8b2abc74f0aaf064f06ee58fc66df6b4212eb3cd 100644
--- a/python/paddle/fluid/tests/unittests/collective/collective_sendrecv_op.py
+++ b/python/paddle/fluid/tests/unittests/collective/collective_sendrecv_op.py
@@ -16,7 +16,6 @@ from test_collective_base import TestCollectiveRunnerBase, runtime_main
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 
 paddle.enable_static()
 
@@ -28,12 +27,12 @@ class TestCollectiveSendRecv(TestCollectiveRunnerBase):
     def get_model(self, main_prog, startup_program):
         ring_id = self.global_ring_id
         with fluid.program_guard(main_prog, startup_program):
-            tindata = layers.data(
+            tindata = paddle.static.data(
                 name="tindata",
                 shape=[10, 1000],
                 dtype='float64',
-                append_batch_size=False,
             )
+            tindata.desc.set_need_check_feed(False)
             if self.rank == 0:
                 main_prog.global_block().append_op(
                     type="send_v2",
diff --git a/python/paddle/fluid/tests/unittests/collective/collective_sendrecv_op_array.py b/python/paddle/fluid/tests/unittests/collective/collective_sendrecv_op_array.py
index 8c4ebeaffda80b0d267379751dbf0125decafc83..bea4a71089af42c1412a9e1eacd9a59a067d7db0 100644
--- a/python/paddle/fluid/tests/unittests/collective/collective_sendrecv_op_array.py
+++ b/python/paddle/fluid/tests/unittests/collective/collective_sendrecv_op_array.py
@@ -17,7 +17,6 @@ from test_collective_base import TestCollectiveRunnerBase, runtime_main
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 
 paddle.enable_static()
 
@@ -29,12 +28,12 @@ class TestCollectiveSendRecv(TestCollectiveRunnerBase):
     def get_model(self, main_prog, startup_program):
         ring_id = self.global_ring_id
         with fluid.program_guard(main_prog, startup_program):
-            tindata = layers.data(
+            tindata = paddle.static.data(
                 name="tindata",
                 shape=[10, 1000],
                 dtype='float64',
-                append_batch_size=False,
             )
+            tindata.desc.set_need_check_feed(False)
             if self.rank == 0:
                 data1 = fluid.layers.assign(
                     np.array([[0, 1, 2]], dtype='float32')
diff --git a/python/paddle/fluid/tests/unittests/collective/collective_sendrecv_op_dynamic_shape.py b/python/paddle/fluid/tests/unittests/collective/collective_sendrecv_op_dynamic_shape.py
index 80a0176782c4ec16246baa749b3cdc6b0190aaf7..2f1aaac0b19a1b9f26ea920c3c9e8bae5b336d48 100644
--- a/python/paddle/fluid/tests/unittests/collective/collective_sendrecv_op_dynamic_shape.py
+++ b/python/paddle/fluid/tests/unittests/collective/collective_sendrecv_op_dynamic_shape.py
@@ -16,7 +16,6 @@ from test_collective_base import TestCollectiveRunnerBase, runtime_main
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 
 paddle.enable_static()
 
@@ -28,12 +27,12 @@ class TestCollectiveSendRecvDynamicShape(TestCollectiveRunnerBase):
     def get_model(self, main_prog, startup_program):
         ring_id = self.global_ring_id
         with fluid.program_guard(main_prog, startup_program):
-            tindata = layers.data(
+            tindata = paddle.static.data(
                 name="tindata",
-                shape=[10, 1000],
+                shape=[-1, 10, 1000],
                 dtype='float64',
-                append_batch_size=False,
             )
+            tindata.desc.set_need_check_feed(False)
             if self.rank == 0:
                 main_prog.global_block().append_op(
                     type="send_v2",
diff --git a/python/paddle/fluid/tests/unittests/collective/collective_split_op.py b/python/paddle/fluid/tests/unittests/collective/collective_split_op.py
index 127ab924da4f5bbb42fe15d6cf3e1bff08e47998..3348d4c9c9f29d02f538484021209cd7187c0533 100644
--- a/python/paddle/fluid/tests/unittests/collective/collective_split_op.py
+++ b/python/paddle/fluid/tests/unittests/collective/collective_split_op.py
@@ -16,7 +16,6 @@ from test_collective_base import TestCollectiveRunnerBase, runtime_main
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 from paddle.fluid import core
 
 paddle.enable_static()
@@ -30,9 +29,10 @@ class TestCollectiveAllGather(TestCollectiveRunnerBase):
         ring_id = 0
         nranks = 2
         with fluid.program_guard(main_prog, startup_program):
-            tindata = layers.data(
-                name="tindata", shape=[10, 1000], dtype='float32'
+            tindata = paddle.static.data(
+                name="tindata", shape=[-1, 10, 1000], dtype='float32'
             )
+            tindata.desc.set_need_check_feed(False)
             toutdata = main_prog.current_block().create_var(
                 name="outofsplit",
                 dtype='float32',
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/dist_mnist_gradient_merge.py b/python/paddle/fluid/tests/unittests/collective/fleet/dist_mnist_gradient_merge.py
index 01d6d970c6cd2c8f4f54ce8ede7576fb514f1bc4..35525d03ca7a6ba099a149acff36cce467ff42f1 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/dist_mnist_gradient_merge.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/dist_mnist_gradient_merge.py
@@ -29,8 +29,10 @@ fluid.default_main_program().random_seed = 1
 class TestDistMnist2x2(TestDistRunnerBase):
     def get_model(self, batch_size=2):
         # Input data
-        images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE)
-        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+        images = paddle.static.data(
+            name='pixel', shape=[-1, 1, 28, 28], dtype=DTYPE
+        )
+        label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
 
         # Train program
         predict = cnn_model(images)
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist.py b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist.py
index c33e5bc27a0cae34f75c223aa305c8a612041a66..2eb0951756a59eaa601549e3974872110e1ad4a4 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist.py
@@ -85,10 +85,12 @@ class TestDistMnist2x2(TestDistRunnerBase):
     def get_model(self, batch_size=2, use_dgc=False, dist_strategy=None):
         # Input data
         with fluid.device_guard("gpu:0"):
-            images = fluid.layers.data(
-                name='pixel', shape=[1, 28, 28], dtype=DTYPE
+            images = paddle.static.data(
+                name='pixel', shape=[-1, 1, 28, 28], dtype=DTYPE
+            )
+            label = paddle.static.data(
+                name='label', shape=[-1, 1], dtype='int64'
             )
-            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
 
             if dist_strategy:
                 data_loader = fluid.io.DataLoader.from_generator(
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_multi_device.py b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_multi_device.py
index 905df8cd6b71f82337c8a50220ab20d66bc8b74a..e094d932d33e48c2a7d51a6c30859cebad6aadf6 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_multi_device.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_multi_device.py
@@ -85,10 +85,12 @@ class TestDistMnist2x2(TestDistRunnerBase):
     def get_model(self, batch_size=2, use_dgc=False, dist_strategy=None):
         # Input data
         with fluid.device_guard("gpu:0"):
-            images = fluid.layers.data(
-                name='pixel', shape=[1, 28, 28], dtype=DTYPE
+            images = paddle.static.data(
+                name='pixel', shape=[-1, 1, 28, 28], dtype=DTYPE
+            )
+            label = paddle.static.data(
+                name='label', shape=[-1, 1], dtype='int64'
             )
-            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
 
             if dist_strategy:
                 data_loader = fluid.io.DataLoader.from_generator(
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_one_device.py b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_one_device.py
index dbe50789d687c1c92003ee7ac750bbd2cb455a78..7e442f1914b2cf05d6f4f8f71856c02add563991 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_one_device.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_one_device.py
@@ -77,10 +77,12 @@ class TestDistMnist2x2(TestDistRunnerBase):
         if dist_strategy:
             fleet.init(is_collective=True)
         with fluid.device_guard("gpu:0"):
-            images = fluid.layers.data(
-                name='pixel', shape=[1, 28, 28], dtype=DTYPE
+            images = paddle.static.data(
+                name='pixel', shape=[-1, 1, 28, 28], dtype=DTYPE
+            )
+            label = paddle.static.data(
+                name='label', shape=[-1, 1], dtype='int64'
             )
-            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
 
             if dist_strategy:
                 data_loader = fluid.io.DataLoader.from_generator(
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_half_async.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_half_async.py
index 21f7b624f5a4dfb0bd24f6cb069f6b723f335c55..6d8decbc3e998647940ba12e26de98f472216fc4 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_half_async.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_half_async.py
@@ -29,9 +29,9 @@ paddle.enable_static()
 
 class TestCommunicatorHalfAsyncEnd2End(unittest.TestCase):
     def net(self):
-        x = fluid.layers.data(name='x', shape=[13], dtype='float32')
+        x = paddle.static.data(name='x', shape=[-1, 13], dtype='float32')
         y_predict = paddle.static.nn.fc(x, size=1, activation=None)
-        y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+        y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
 
         cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_cost = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_sync.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_sync.py
index 550fc5db902416281c30095f089cdf19e9fa0b81..cf0babc5877333bf10ca92fd86278a0719b6412a 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_sync.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_sync.py
@@ -27,8 +27,8 @@ import paddle.fluid as fluid
 
 class TestCommunicator(unittest.TestCase):
     def net(self):
-        x = fluid.layers.data(name='x', shape=[1], dtype='float32')
-        y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+        x = paddle.static.data(name='x', shape=[-1, 1], dtype='float32')
+        y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
         cost = paddle.nn.functional.square_error_cost(input=x, label=y)
         avg_cost = paddle.mean(cost)
         return avg_cost
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_distributed_strategy.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_distributed_strategy.py
index 3e58391ec976882a05f009c3b93bb031e2cbe30b..085cb293c0e46418fea8cf73850dc257d8e2aca0 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_distributed_strategy.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_distributed_strategy.py
@@ -270,8 +270,8 @@ class TestHalfAsyncStrategy(unittest.TestCase):
 
 class TestDebugInfo(unittest.TestCase):
     def test_debug_info(self):
-        x = fluid.layers.data(name='x', shape=[1], dtype='float32')
-        y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+        x = paddle.static.data(name='x', shape=[-1, 1], dtype='float32')
+        y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
         y_predict = paddle.static.nn.fc(x, size=1, activation=None)
         cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_cost = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_fp16_allreduce_meta_optimizer.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_fp16_allreduce_meta_optimizer.py
index 0e5ae267f32af44119e0de912b50e22f1e8f3bf7..84dd2e0ee964b5f4dd1c8ca903e12097f583e51b 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_fp16_allreduce_meta_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_fp16_allreduce_meta_optimizer.py
@@ -30,12 +30,8 @@ class TestFleetFP16CompressOptimizer(unittest.TestCase):
 
     def net(self, main_prog, startup_prog, dtype='float32'):
         with fluid.program_guard(main_prog, startup_prog):
-            input_x = paddle.fluid.layers.data(
-                name="x", shape=[32], dtype=dtype
-            )
-            input_y = paddle.fluid.layers.data(
-                name="y", shape=[1], dtype='int64'
-            )
+            input_x = paddle.static.data(name="x", shape=[-1, 32], dtype=dtype)
+            input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64')
 
             fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
             fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_graph_execution_meta_optimizer.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_graph_execution_meta_optimizer.py
index aa8bc8bd2d73989600def09d8bc830924fa375bd..a36c5a1d74cc08e3e1d290cd38068f2e4fdd0273 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_graph_execution_meta_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_graph_execution_meta_optimizer.py
@@ -60,12 +60,10 @@ class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase):
             import paddle.distributed.fleet as fleet
 
             fleet.init(is_collective=True)
-            input_x = paddle.fluid.layers.data(
-                name="x", shape=[32], dtype='float32'
-            )
-            input_y = paddle.fluid.layers.data(
-                name="y", shape=[1], dtype='int64'
+            input_x = paddle.static.data(
+                name="x", shape=[-1, 32], dtype='float32'
             )
+            input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64')
 
             fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
             fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
@@ -126,12 +124,10 @@ class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase):
             import paddle.distributed.fleet as fleet
 
             fleet.init(is_collective=True)
-            input_x = paddle.fluid.layers.data(
-                name="x", shape=[32], dtype='float32'
-            )
-            input_y = paddle.fluid.layers.data(
-                name="y", shape=[1], dtype='int64'
+            input_x = paddle.static.data(
+                name="x", shape=[-1, 32], dtype='float32'
             )
+            input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64')
 
             fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
             fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
@@ -204,12 +200,10 @@ class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase):
             import paddle.distributed.fleet as fleet
 
             fleet.init(is_collective=True)
-            input_x = paddle.fluid.layers.data(
-                name="x", shape=[32], dtype='float32'
-            )
-            input_y = paddle.fluid.layers.data(
-                name="y", shape=[1], dtype='int64'
+            input_x = paddle.static.data(
+                name="x", shape=[-1, 32], dtype='float32'
             )
+            input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64')
 
             fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
             fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
@@ -269,12 +263,10 @@ class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase):
             import paddle.distributed.fleet as fleet
 
             fleet.init(is_collective=True)
-            input_x = paddle.fluid.layers.data(
-                name="x", shape=[32], dtype='float32'
-            )
-            input_y = paddle.fluid.layers.data(
-                name="y", shape=[1], dtype='int64'
+            input_x = paddle.static.data(
+                name="x", shape=[-1, 32], dtype='float32'
             )
+            input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64')
 
             fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
             fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_graph_executor.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_graph_executor.py
index 58091bd847f2c960a58b32da9ffb40b7755d1608..aab4032afbce7efeab1c63b4b6bdc3a885453d5d 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_graph_executor.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_graph_executor.py
@@ -45,12 +45,10 @@ class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase):
         def node_func():
             role = role_maker.PaddleCloudRoleMaker(is_collective=True)
             fleet.init(role)
-            input_x = paddle.fluid.layers.data(
-                name="x", shape=[32], dtype='float32'
-            )
-            input_y = paddle.fluid.layers.data(
-                name="y", shape=[1], dtype='int64'
+            input_x = paddle.static.data(
+                name="x", shape=[-1, 32], dtype='float32'
             )
+            input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64')
 
             fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
             fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_lamb_meta_optimizer.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_lamb_meta_optimizer.py
index 2623a222d50b42d2c8c6bd68f8a8e8d3b54faca7..96cf0fffe87be160e51dc4570c94b0cb484d50d5 100755
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_lamb_meta_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_lamb_meta_optimizer.py
@@ -33,11 +33,11 @@ class TestFleetLambMetaOptimizer(unittest.TestCase):
     def net(self, main_prog, startup_prog):
         with fluid.program_guard(main_prog, startup_prog):
             with fluid.unique_name.guard():
-                input_x = paddle.fluid.layers.data(
-                    name="x", shape=[32], dtype='float32'
+                input_x = paddle.static.data(
+                    name="x", shape=[-1, 32], dtype='float32'
                 )
-                input_y = paddle.fluid.layers.data(
-                    name="y", shape=[1], dtype='int64'
+                input_y = paddle.static.data(
+                    name="y", shape=[-1, 1], dtype='int64'
                 )
 
                 fc_1 = paddle.static.nn.fc(
@@ -117,10 +117,8 @@ class TestFleetLambMetaOptimizer(unittest.TestCase):
     def test_lamb_apply_with_amp(self):
         role = role_maker.PaddleCloudRoleMaker(is_collective=True)
         fleet.init(role)
-        input_x = paddle.fluid.layers.data(
-            name="x", shape=[32], dtype='float32'
-        )
-        input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64')
+        input_x = paddle.static.data(name="x", shape=[-1, 32], dtype='float32')
+        input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64')
 
         fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
         fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_lars_meta_optimizer.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_lars_meta_optimizer.py
index 509606290144606691a74a93a0877e64c8d374fb..b3094b3b6b0311506328f4d165e6db651bfc5ff1 100755
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_lars_meta_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_lars_meta_optimizer.py
@@ -33,11 +33,11 @@ class TestFleetLarsMetaOptimizer(unittest.TestCase):
     def net(self, main_prog, startup_prog):
         with fluid.program_guard(main_prog, startup_prog):
             with fluid.unique_name.guard():
-                input_x = paddle.fluid.layers.data(
-                    name="x", shape=[32], dtype='float32'
+                input_x = paddle.static.data(
+                    name="x", shape=[-1, 32], dtype='float32'
                 )
-                input_y = paddle.fluid.layers.data(
-                    name="y", shape=[1], dtype='int64'
+                input_y = paddle.static.data(
+                    name="y", shape=[-1, 1], dtype='int64'
                 )
 
                 fc_1 = paddle.static.nn.fc(
@@ -122,10 +122,8 @@ class TestFleetLarsMetaOptimizer(unittest.TestCase):
     def test_lars_apply_with_amp(self):
         role = role_maker.PaddleCloudRoleMaker(is_collective=True)
         fleet.init(role)
-        input_x = paddle.fluid.layers.data(
-            name="x", shape=[32], dtype='float32'
-        )
-        input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64')
+        input_x = paddle.static.data(name="x", shape=[-1, 32], dtype='float32')
+        input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64')
 
         fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
         fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_meta_optimizer_base.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_meta_optimizer_base.py
index 76fb129a0494617e3e7c04018756aa88891088c6..3551c89a2601487b92095ae16d1a3d16b063801a 100755
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_meta_optimizer_base.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_meta_optimizer_base.py
@@ -29,11 +29,11 @@ class TestFleetMetaOptimizerBase(unittest.TestCase):
             with fluid.unique_name.guard():
                 role = role_maker.PaddleCloudRoleMaker(is_collective=True)
                 fleet.init(role)
-                input_x = paddle.fluid.layers.data(
-                    name="x", shape=[32], dtype='float32'
+                input_x = paddle.static.data(
+                    name="x", shape=[-1, 32], dtype='float32'
                 )
-                input_y = paddle.fluid.layers.data(
-                    name="y", shape=[1], dtype='int64'
+                input_y = paddle.static.data(
+                    name="y", shape=[-1, 1], dtype='int64'
                 )
 
                 fc_1 = paddle.static.nn.fc(
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_pipeline_meta_optimizer.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_pipeline_meta_optimizer.py
index 8fac45b9d249a951a68050759d4821868d7ae5da..ddba6331998264964ad94024d73e6b6b2a14296f 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_pipeline_meta_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_pipeline_meta_optimizer.py
@@ -33,14 +33,12 @@ class TestFleetMetaOptimizer(unittest.TestCase):
 
     def net(self):
         with static.device_guard("gpu:0"):
-            input_x = paddle.fluid.layers.data(
-                name="x", shape=[32], dtype='float32'
+            input_x = paddle.static.data(
+                name="x", shape=[-1, 32], dtype='float32'
             )
-            input_y = paddle.fluid.layers.data(
-                name="y", shape=[1], dtype='int64'
-            )
-            input_z = paddle.fluid.layers.data(
-                name="z", shape=[1], dtype="float32"
+            input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64')
+            input_z = paddle.static.data(
+                name="z", shape=[-1, 1], dtype="float32"
             )
             with static.device_guard("gpu:all"):
                 input_z = input_z * 1.0
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_pipeline_meta_optimizer_with_recompute.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_pipeline_meta_optimizer_with_recompute.py
index 3f22238a3637c474dc224f01fd77c0e262d3f76b..7c80ad98597aa81b101075cbc3f7eefc1ad5a3d9 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_pipeline_meta_optimizer_with_recompute.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_pipeline_meta_optimizer_with_recompute.py
@@ -34,12 +34,10 @@ class TestFleetMetaOptimizer(unittest.TestCase):
         role = role_maker.PaddleCloudRoleMaker(is_collective=True)
         fleet.init(role)
         with paddle.fluid.device_guard("gpu:0"):
-            input_x = paddle.fluid.layers.data(
-                name="x", shape=[32], dtype='float32'
-            )
-            input_y = paddle.fluid.layers.data(
-                name="y", shape=[1], dtype='int64'
+            input_x = paddle.static.data(
+                name="x", shape=[-1, 32], dtype='float32'
             )
+            input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64')
             fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
             fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
             fc_3 = paddle.static.nn.fc(x=fc_2, size=64, activation='tanh')
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_raw_program_meta_optimizer.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_raw_program_meta_optimizer.py
index 29ef57518d0c5308071f95b6b2135f4c4269796d..62b3ba99eaf8d5c2e9517b2651b06604b8078e06 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_raw_program_meta_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_raw_program_meta_optimizer.py
@@ -33,10 +33,8 @@ class TestFleetMetaOptimizer(unittest.TestCase):
 
         role = role_maker.PaddleCloudRoleMaker(is_collective=True)
         fleet.init(role)
-        input_x = paddle.fluid.layers.data(
-            name="x", shape=[32], dtype='float32'
-        )
-        input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64')
+        input_x = paddle.static.data(name="x", shape=[-1, 32], dtype='float32')
+        input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64')
         fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
 
         fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_rolemaker_new.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_rolemaker_new.py
index 196e3425ec65a8c7ce5f0e623bd6042ea8f79926..7e687845944647110049b60937d96ae7f3edbb9c 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_rolemaker_new.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_rolemaker_new.py
@@ -443,9 +443,9 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase):
         os.environ["PADDLE_GLOO_FS_PATH"] = tmp
 
         def net():
-            x = paddle.fluid.layers.data(name='x', shape=[13], dtype='float32')
+            x = paddle.static.data(name='x', shape=[-1, 13], dtype='float32')
             y_predict = paddle.static.nn.fc(x, size=1, activation=None)
-            y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32')
+            y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
             cost = paddle.nn.functional.square_error_cost(
                 input=y_predict, label=y
             )
diff --git a/python/paddle/fluid/tests/unittests/collective_allgather_op.py b/python/paddle/fluid/tests/unittests/collective_allgather_op.py
index e877aaae9bd8f143041e0938dadc1a2f357842ff..5fd37ebe93878c24e7ebecf713e2adc2a8d51bad 100644
--- a/python/paddle/fluid/tests/unittests/collective_allgather_op.py
+++ b/python/paddle/fluid/tests/unittests/collective_allgather_op.py
@@ -16,7 +16,6 @@ from test_collective_base import TestCollectiveRunnerBase, runtime_main
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 from paddle.fluid import core
 
 paddle.enable_static()
@@ -30,9 +29,10 @@ class TestCollectiveAllGather(TestCollectiveRunnerBase):
         ring_id = 0
         nranks = 2
         with fluid.program_guard(main_prog, startup_program):
-            tindata = layers.data(
-                name="tindata", shape=[10, 1000], dtype='float32'
+            tindata = paddle.static.data(
+                name="tindata", shape=[-1, 10, 1000], dtype='float32'
             )
+            tindata.desc.set_need_check_feed(False)
             toutdata = main_prog.current_block().create_var(
                 name="outofgather",
                 dtype='float32',
diff --git a/python/paddle/fluid/tests/unittests/collective_reducescatter.py b/python/paddle/fluid/tests/unittests/collective_reducescatter.py
index 0852f7b2f36628d00bc390f05eb062d7ef7a0abb..9813553295fbcdee182d5948c2647f2ebce91451 100644
--- a/python/paddle/fluid/tests/unittests/collective_reducescatter.py
+++ b/python/paddle/fluid/tests/unittests/collective_reducescatter.py
@@ -16,7 +16,6 @@ from test_collective_base import TestCollectiveRunnerBase, runtime_main
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 
 paddle.enable_static()
 
@@ -29,9 +28,10 @@ class TestCollectiveReduceScatter(TestCollectiveRunnerBase):
         ring_id = 0
         nranks = 2
         with fluid.program_guard(main_prog, startup_program):
-            tindata = layers.data(
-                name="tindata", shape=[10, 1000], dtype='float32'
+            tindata = paddle.static.data(
+                name="tindata", shape=[-1, 10, 1000], dtype='float32'
             )
+            tindata.desc.set_need_check_feed(False)
             toutdata = fluid.layers.collective._c_reducescatter(tindata, nranks)
             toutdata = fluid.layers.collective._c_sync_comm_stream(toutdata, 0)
             return toutdata
diff --git a/python/paddle/fluid/tests/unittests/collective_reducescatter_op.py b/python/paddle/fluid/tests/unittests/collective_reducescatter_op.py
index d89701e6a4a802e3b57841c1f5ce85dc6b4016b5..92c9ec3b6ed6ad5db5b1f3e03929e368755b6f70 100644
--- a/python/paddle/fluid/tests/unittests/collective_reducescatter_op.py
+++ b/python/paddle/fluid/tests/unittests/collective_reducescatter_op.py
@@ -16,7 +16,6 @@ from test_collective_base import TestCollectiveRunnerBase, runtime_main
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 from paddle.fluid import core
 
 paddle.enable_static()
@@ -30,9 +29,10 @@ class TestCollectiveReduceScatter(TestCollectiveRunnerBase):
         ring_id = 0
         nranks = 2
         with fluid.program_guard(main_prog, startup_program):
-            tindata = layers.data(
-                name="tindata", shape=[10, 1000], dtype='float32'
+            tindata = paddle.static.data(
+                name="tindata", shape=[-1, 10, 1000], dtype='float32'
             )
+            tindata.desc.set_need_check_feed(False)
             toutdata = main_prog.current_block().create_var(
                 name="outofrs",
                 dtype='float32',
diff --git a/python/paddle/fluid/tests/unittests/composite_ops/test_composite_softmax.py b/python/paddle/fluid/tests/unittests/composite_ops/test_composite_softmax.py
index c7c876b8f8fea6c79bfdf91c95e4dc16d7cefb93..fd54850b2cb6f2a55d18f28c4644b7ea4b859fd1 100644
--- a/python/paddle/fluid/tests/unittests/composite_ops/test_composite_softmax.py
+++ b/python/paddle/fluid/tests/unittests/composite_ops/test_composite_softmax.py
@@ -19,6 +19,7 @@ from utils import TOLERANCE
 
 import paddle
 import paddle.nn.functional as F
+from paddle.fluid import core
 
 
 def generate_data(shape, dtype="float32"):
@@ -72,6 +73,7 @@ class TestCompositeSoftmax(unittest.TestCase):
 
     def cal_composite(self, inputs):
         paddle.enable_static()
+        core._set_prim_forward_enabled(True)
         startup_program = paddle.static.Program()
         main_program = paddle.static.Program()
         with paddle.static.program_guard(main_program, startup_program):
@@ -95,6 +97,7 @@ class TestCompositeSoftmax(unittest.TestCase):
         exe.run(startup_program)
         res = exe.run(main_program, feed={'x': inputs}, fetch_list=[y])
         paddle.disable_static()
+        core._set_prim_forward_enabled(False)
         return res
 
     def compare_forward(self):
diff --git a/python/paddle/fluid/tests/unittests/composite_ops/test_composite_softmax_grad.py b/python/paddle/fluid/tests/unittests/composite_ops/test_composite_softmax_grad.py
index 808c5f8324b65a87efa5c46005c553f5f58703fb..9b6e5db7953565c1289800a550e7b9dca7e9b399 100644
--- a/python/paddle/fluid/tests/unittests/composite_ops/test_composite_softmax_grad.py
+++ b/python/paddle/fluid/tests/unittests/composite_ops/test_composite_softmax_grad.py
@@ -78,6 +78,7 @@ class TestCompositeSoftmax(unittest.TestCase):
 
     def cal_composite_grad(self, inputs):
         paddle.enable_static()
+        core._set_prim_all_enabled(True)
         startup_program = paddle.static.Program()
         main_program = paddle.static.Program()
         with paddle.static.program_guard(main_program, startup_program):
@@ -108,6 +109,7 @@ class TestCompositeSoftmax(unittest.TestCase):
         exe.run(startup_program)
         res = exe.run(main_program, feed={'x': inputs}, fetch_list=[z])
         paddle.disable_static()
+        core._set_prim_all_enabled(False)
         return res
 
     def compare_backward(self):
@@ -139,7 +141,7 @@ class TestCompositeSoftmaxPrimBackward(unittest.TestCase):
     "test composite softmax and prim backward"
 
     def setUp(self):
-        core.set_prim_enabled(True)
+        core._set_prim_backward_enabled(True)
         self.dtypes = ["float32"]
         self.shapes = [[2, 3, 4], [2, 3]]
         self.axes = [-1, 0, 1]
diff --git a/python/paddle/fluid/tests/unittests/dist_allreduce_op.py b/python/paddle/fluid/tests/unittests/dist_allreduce_op.py
index 70d06a95b64aaeee9b95a0a6b06966fc45d47328..30bcea4cb5cb2344f3d50e3d10d1951dbedabc71 100644
--- a/python/paddle/fluid/tests/unittests/dist_allreduce_op.py
+++ b/python/paddle/fluid/tests/unittests/dist_allreduce_op.py
@@ -72,8 +72,10 @@ def cnn_model(data):
 class TestDistMnist2x2(TestDistRunnerBase):
     def get_model(self, batch_size=2, single_device=False):
         # Input data
-        images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE)
-        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+        images = paddle.static.data(
+            name='pixel', shape=[-1, 1, 28, 28], dtype=DTYPE
+        )
+        label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
 
         # Train program
         predict = cnn_model(images)
diff --git a/python/paddle/fluid/tests/unittests/dist_ctr.py b/python/paddle/fluid/tests/unittests/dist_ctr.py
index 7fd86cadb99eb733c5f37d33725b42e4568282e0..deb4cb921c1f3a4809e3d97dd1d7a0b0a78d5251 100644
--- a/python/paddle/fluid/tests/unittests/dist_ctr.py
+++ b/python/paddle/fluid/tests/unittests/dist_ctr.py
@@ -33,26 +33,23 @@ class TestDistCTR2x2(TestDistRunnerBase):
 
         dnn_input_dim, lr_input_dim = dist_ctr_reader.load_data_meta()
         """ network definition """
-        dnn_data = fluid.layers.data(
+        dnn_data = paddle.static.data(
             name="dnn_data",
             shape=[-1, 1],
             dtype="int64",
             lod_level=1,
-            append_batch_size=False,
         )
-        lr_data = fluid.layers.data(
+        lr_data = paddle.static.data(
             name="lr_data",
             shape=[-1, 1],
             dtype="int64",
             lod_level=1,
-            append_batch_size=False,
         )
-        label = fluid.layers.data(
+        label = paddle.static.data(
             name="click",
             shape=[-1, 1],
             dtype="int64",
             lod_level=0,
-            append_batch_size=False,
         )
 
         # build dnn model
diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py
index 360cad434096be3e4705dc9b8367f3c9f6089bf6..8e9341f9c5b1a6b27d9e7c447e35cddcca71e13a 100644
--- a/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py
+++ b/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py
@@ -62,26 +62,23 @@ class TestDistCTR2x2(FleetDistRunnerBase):
         """
         dnn_input_dim, lr_input_dim = int(1e5), int(1e5)
 
-        dnn_data = fluid.layers.data(
+        dnn_data = paddle.static.data(
             name="dnn_data",
             shape=[-1, 1],
             dtype="int64",
             lod_level=1,
-            append_batch_size=False,
         )
-        lr_data = fluid.layers.data(
+        lr_data = paddle.static.data(
             name="lr_data",
             shape=[-1, 1],
             dtype="int64",
             lod_level=1,
-            append_batch_size=False,
         )
-        label = fluid.layers.data(
+        label = paddle.static.data(
             name="click",
             shape=[-1, 1],
             dtype="int64",
             lod_level=0,
-            append_batch_size=False,
         )
 
         datas = [dnn_data, lr_data, label]
diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_heter_pipeline_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_heter_pipeline_ctr.py
index 8d4efa8c3d11629a49c034b46dab1fb21f9071cd..3e71a1cb6054d6a9a70f6e0c26e7ca4da67a4711 100644
--- a/python/paddle/fluid/tests/unittests/dist_fleet_heter_pipeline_ctr.py
+++ b/python/paddle/fluid/tests/unittests/dist_fleet_heter_pipeline_ctr.py
@@ -49,26 +49,23 @@ class TestHeterPipelinePsCTR2x2(FleetDistHeterRunnerBase):
         dnn_input_dim, lr_input_dim = int(1e5), int(1e5)
 
         with fluid.device_guard("cpu"):
-            dnn_data = fluid.layers.data(
+            dnn_data = paddle.static.data(
                 name="dnn_data",
                 shape=[-1, 1],
                 dtype="int64",
                 lod_level=1,
-                append_batch_size=False,
             )
-            lr_data = fluid.layers.data(
+            lr_data = paddle.static.data(
                 name="lr_data",
                 shape=[-1, 1],
                 dtype="int64",
                 lod_level=1,
-                append_batch_size=False,
             )
-            label = fluid.layers.data(
+            label = paddle.static.data(
                 name="click",
                 shape=[-1, 1],
                 dtype="float32",
                 lod_level=0,
-                append_batch_size=False,
             )
 
             datas = [dnn_data, lr_data, label]
diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer.py b/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer.py
index 539446b6741b3cff2e6359587671f69c42b50f4c..dc0a7022b34348d7ee6aa40e2c89598cdc7ea0a3 100644
--- a/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer.py
@@ -74,8 +74,10 @@ def cnn_model(data):
 class TestFleetMetaOptimizerPrecision(TestDistRunnerBase):
     def get_model(self, batch_size=2, single_device=False):
         # Input data
-        images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE)
-        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+        images = paddle.static.data(
+            name='pixel', shape=[-1, 1, 28, 28], dtype=DTYPE
+        )
+        label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
 
         # Train program
         predict = cnn_model(images)
diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer_fuse_allreduce.py b/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer_fuse_allreduce.py
index efad598c725a44c2bf5edf4ab2eb5d4df99d50cd..ac1a4c632fd495d40e905d7ac295f30d8d8fd7e5 100644
--- a/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer_fuse_allreduce.py
+++ b/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer_fuse_allreduce.py
@@ -74,8 +74,10 @@ def cnn_model(data):
 class TestFleetMetaOptimizerFuseAllReducePrecision(TestDistRunnerBase):
     def get_model(self, batch_size=2, single_device=False):
         # Input data
-        images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE)
-        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+        images = paddle.static.data(
+            name='pixel', shape=[-1, 1, 28, 28], dtype=DTYPE
+        )
+        label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
 
         # Train program
         predict = cnn_model(images)
diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py b/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py
index 358b3b5e39a5f747ca9856d22addb229990af931..b673bfeae16e2563333ac9e77d9f5d087b2fb734 100644
--- a/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py
+++ b/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py
@@ -93,18 +93,18 @@ def train_network(
     is_pyreader=False,
 ):
     # query
-    q = fluid.layers.data(
-        name="query_ids", shape=[1], dtype="int64", lod_level=1
+    q = paddle.static.data(
+        name="query_ids", shape=[-1, 1], dtype="int64", lod_level=1
     )
     # label data
-    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+    label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64")
     # pt
-    pt = fluid.layers.data(
-        name="pos_title_ids", shape=[1], dtype="int64", lod_level=1
+    pt = paddle.static.data(
+        name="pos_title_ids", shape=[-1, 1], dtype="int64", lod_level=1
     )
     # nt
-    nt = fluid.layers.data(
-        name="neg_title_ids", shape=[1], dtype="int64", lod_level=1
+    nt = paddle.static.data(
+        name="neg_title_ids", shape=[-1, 1], dtype="int64", lod_level=1
     )
 
     datas = [q, label, pt, nt]
diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py
index 9e398e83b90c48b6be430b642963615d5f770fae..a9a2d7be0ba413c337b6e2cd545f5e8e36764102 100644
--- a/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py
+++ b/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py
@@ -52,26 +52,23 @@ class TestDistCTR2x2(FleetDistRunnerBase):
         """
         dnn_input_dim, lr_input_dim = 10, 10
 
-        dnn_data = fluid.layers.data(
+        dnn_data = paddle.static.data(
             name="dnn_data",
             shape=[-1, 1],
             dtype="int64",
             lod_level=1,
-            append_batch_size=False,
         )
-        lr_data = fluid.layers.data(
+        lr_data = paddle.static.data(
             name="lr_data",
             shape=[-1, 1],
             dtype="int64",
             lod_level=1,
-            append_batch_size=False,
         )
-        label = fluid.layers.data(
+        label = paddle.static.data(
             name="click",
             shape=[-1, 1],
             dtype="int64",
             lod_level=0,
-            append_batch_size=False,
         )
 
         datas = [dnn_data, lr_data, label]
diff --git a/python/paddle/fluid/tests/unittests/dist_mnist.py b/python/paddle/fluid/tests/unittests/dist_mnist.py
index 117e178dd9670becda56bcae455f9426de42a9ae..87eb22dceac1ccca81765b83f43abb29060d0229 100644
--- a/python/paddle/fluid/tests/unittests/dist_mnist.py
+++ b/python/paddle/fluid/tests/unittests/dist_mnist.py
@@ -73,8 +73,10 @@ def cnn_model(data):
 class TestDistMnist2x2(TestDistRunnerBase):
     def get_model(self, batch_size=2, use_dgc=False, dist_strategy=None):
         # Input data
-        images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE)
-        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+        images = paddle.static.data(
+            name='pixel', shape=[-1, 1, 28, 28], dtype=DTYPE
+        )
+        label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
 
         # Train program
         predict = cnn_model(images)
diff --git a/python/paddle/fluid/tests/unittests/dist_mnist_batch_merge.py b/python/paddle/fluid/tests/unittests/dist_mnist_batch_merge.py
index cac46996edd4a35a98addcbffd8506f89b1c709b..e38d2176bc18d3a8bfb3dd41d8fd4b5e57a118db 100644
--- a/python/paddle/fluid/tests/unittests/dist_mnist_batch_merge.py
+++ b/python/paddle/fluid/tests/unittests/dist_mnist_batch_merge.py
@@ -38,8 +38,10 @@ def test_merge_reader(repeat_batch_size=8):
 class TestDistMnist2x2(TestDistRunnerBase):
     def get_model(self, batch_size=2):
         # Input data
-        images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE)
-        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+        images = paddle.static.data(
+            name='pixel', shape=[-1, 1, 28, 28], dtype=DTYPE
+        )
+        label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
 
         # Train program
         predict = cnn_model(images)
diff --git a/python/paddle/fluid/tests/unittests/dist_mnist_fp16_allreduce.py b/python/paddle/fluid/tests/unittests/dist_mnist_fp16_allreduce.py
index e4af13fa89b066e02a9f026f3d6a11223a5536c1..9aa662854274f9b473142fbc0f85c5baa78156c1 100644
--- a/python/paddle/fluid/tests/unittests/dist_mnist_fp16_allreduce.py
+++ b/python/paddle/fluid/tests/unittests/dist_mnist_fp16_allreduce.py
@@ -32,8 +32,10 @@ fluid.default_main_program().random_seed = 1
 class TestDistMnist2x2(TestDistRunnerBase):
     def get_model(self, batch_size=2):
         # Input data
-        images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE)
-        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+        images = paddle.static.data(
+            name='pixel', shape=[-1, 1, 28, 28], dtype=DTYPE
+        )
+        label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
 
         # Train program
         predict = cnn_model(images)
diff --git a/python/paddle/fluid/tests/unittests/dist_mnist_lars.py b/python/paddle/fluid/tests/unittests/dist_mnist_lars.py
index dc2fc10450bf6e6532e68926d7851f888ad4f1d6..b1f5e0f2e8f3499d87a3e3f0d8aa72efdecac616 100644
--- a/python/paddle/fluid/tests/unittests/dist_mnist_lars.py
+++ b/python/paddle/fluid/tests/unittests/dist_mnist_lars.py
@@ -29,8 +29,10 @@ fluid.default_main_program().random_seed = 1
 class TestDistMnist2x2(TestDistRunnerBase):
     def get_model(self, batch_size=2):
         # Input data
-        images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE)
-        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+        images = paddle.static.data(
+            name='pixel', shape=[-1, 1, 28, 28], dtype=DTYPE
+        )
+        label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
 
         # Train program
         predict = cnn_model(images)
diff --git a/python/paddle/fluid/tests/unittests/dist_se_resnext.py b/python/paddle/fluid/tests/unittests/dist_se_resnext.py
index 377ad3072277dfda398d255c6ba31089700a679c..db3318d67d88ab8f629f12e42c3734f54fa0f19b 100644
--- a/python/paddle/fluid/tests/unittests/dist_se_resnext.py
+++ b/python/paddle/fluid/tests/unittests/dist_se_resnext.py
@@ -209,10 +209,10 @@ class SE_ResNeXt:
 class DistSeResneXt2x2(TestDistRunnerBase):
     def get_model(self, batch_size=2, use_dgc=False):
         # Input data
-        image = fluid.layers.data(
-            name="data", shape=[3, 224, 224], dtype='float32'
+        image = paddle.static.data(
+            name="data", shape=[-1, 3, 224, 224], dtype='float32'
         )
-        label = fluid.layers.data(name="int64", shape=[1], dtype='int64')
+        label = paddle.static.data(name="int64", shape=[-1, 1], dtype='int64')
 
         # Train program
         model = SE_ResNeXt(layers=50)
diff --git a/python/paddle/fluid/tests/unittests/dist_sharding_save.py b/python/paddle/fluid/tests/unittests/dist_sharding_save.py
index 1c4f49093df3530cc93a973e223df02be2ff4d9b..c509b2506b08daaa69323e1b5dc846cfaca3c06c 100755
--- a/python/paddle/fluid/tests/unittests/dist_sharding_save.py
+++ b/python/paddle/fluid/tests/unittests/dist_sharding_save.py
@@ -38,12 +38,10 @@ def runtime_main():
     fleet.init(role)
     with fluid.program_guard(train_prog, startup_prog):
         with fluid.unique_name.guard():
-            input_x = paddle.fluid.layers.data(
-                name="x", shape=[32], dtype='float32'
-            )
-            input_y = paddle.fluid.layers.data(
-                name="y", shape=[1], dtype='int64'
+            input_x = paddle.static.data(
+                name="x", shape=[-1, 32], dtype='float32'
             )
+            input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64')
 
             fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
             fc_2 = paddle.static.nn.fc(x=fc_1, size=256, activation='tanh')
diff --git a/python/paddle/fluid/tests/unittests/dist_text_classification.py b/python/paddle/fluid/tests/unittests/dist_text_classification.py
index de8630cf70b49ef441adac752ca374a5959b16f1..a287bd8a6c878400ac29e9b15f278450fa5132d3 100644
--- a/python/paddle/fluid/tests/unittests/dist_text_classification.py
+++ b/python/paddle/fluid/tests/unittests/dist_text_classification.py
@@ -95,8 +95,8 @@ def conv_net(
 
 
 def inference_network(dict_dim):
-    data = fluid.layers.data(
-        name="words", shape=[1], dtype="int64", lod_level=1
+    data = paddle.static.data(
+        name="words", shape=[-1, 1], dtype="int64", lod_level=1
     )
     out = conv_net(data, dict_dim)
     return out
@@ -125,10 +125,10 @@ class TestDistTextClassification2x2(TestDistRunnerBase):
         word_dict, dict_dim = get_worddict(vocab)
 
         # Input data
-        data = fluid.layers.data(
-            name="words", shape=[1], dtype="int64", lod_level=1
+        data = paddle.static.data(
+            name="words", shape=[-1, 1], dtype="int64", lod_level=1
         )
-        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+        label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
 
         # Train program
         predict = conv_net(data, dict_dim)
diff --git a/python/paddle/fluid/tests/unittests/dist_transformer.py b/python/paddle/fluid/tests/unittests/dist_transformer.py
index 0db96bee8bba67ed0e63d32a5028c1681dbfdcf5..5b0343bd81c245d3780138146882973ec259ad10 100644
--- a/python/paddle/fluid/tests/unittests/dist_transformer.py
+++ b/python/paddle/fluid/tests/unittests/dist_transformer.py
@@ -1512,14 +1512,13 @@ def make_all_inputs(input_fields):
     """
     inputs = []
     for input_field in input_fields:
-        input_var = layers.data(
+        input_var = paddle.static.data(
             name=input_field,
             shape=input_descs[input_field][0],
             dtype=input_descs[input_field][1],
             lod_level=input_descs[input_field][2]
             if len(input_descs[input_field]) == 3
             else 0,
-            append_batch_size=False,
         )
         inputs.append(input_var)
     return inputs
diff --git a/python/paddle/fluid/tests/unittests/dist_word2vec.py b/python/paddle/fluid/tests/unittests/dist_word2vec.py
index 746a1f07655ab00330bda06e229ff0b28c612a35..e10131667c745fd6da97f3fb1a742d60b7fcf3df 100644
--- a/python/paddle/fluid/tests/unittests/dist_word2vec.py
+++ b/python/paddle/fluid/tests/unittests/dist_word2vec.py
@@ -107,13 +107,21 @@ class TestDistWord2vec2x2(TestDistRunnerBase):
         word_dict = paddle.dataset.imikolov.build_dict()
         dict_size = len(word_dict)
 
-        first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64')
-        second_word = fluid.layers.data(
-            name='secondw', shape=[1], dtype='int64'
+        first_word = paddle.static.data(
+            name='firstw', shape=[-1, 1], dtype='int64'
+        )
+        second_word = paddle.static.data(
+            name='secondw', shape=[-1, 1], dtype='int64'
+        )
+        third_word = paddle.static.data(
+            name='thirdw', shape=[-1, 1], dtype='int64'
+        )
+        forth_word = paddle.static.data(
+            name='forthw', shape=[-1, 1], dtype='int64'
+        )
+        next_word = paddle.static.data(
+            name='nextw', shape=[-1, 1], dtype='int64'
         )
-        third_word = fluid.layers.data(name='thirdw', shape=[1], dtype='int64')
-        forth_word = fluid.layers.data(name='forthw', shape=[1], dtype='int64')
-        next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
         avg_cost, predict_word = __network__(
             [first_word, second_word, third_word, forth_word, next_word]
         )
diff --git a/python/paddle/fluid/tests/unittests/distribution/test_distribution_normal.py b/python/paddle/fluid/tests/unittests/distribution/test_distribution_normal.py
index 42c69af5e6bebd9e7c23061328545c27be08f8ad..bb3fb57161a8cdfa0713e1ca1df6a01f1f7e6594 100644
--- a/python/paddle/fluid/tests/unittests/distribution/test_distribution_normal.py
+++ b/python/paddle/fluid/tests/unittests/distribution/test_distribution_normal.py
@@ -24,7 +24,6 @@ from test_distribution import DistributionNumpy
 import paddle
 from paddle import fluid
 from paddle.distribution import Normal
-from paddle.fluid import layers
 
 np.random.seed(2022)
 
@@ -117,8 +116,8 @@ class NormalTest(unittest.TestCase):
         self.static_other_loc = self.other_loc_np
         self.static_other_scale = self.other_scale_np
         with fluid.program_guard(self.test_program):
-            self.static_values = layers.data(
-                name='values', shape=[], dtype='float32'
+            self.static_values = paddle.static.data(
+                name='values', shape=[-1], dtype='float32'
             )
 
     def compare_with_numpy(self, fetch_list, sample_shape=7, tolerance=1e-6):
@@ -237,8 +236,8 @@ class NormalTest3(NormalTest):
         self.static_other_loc = self.other_loc_np
         self.static_other_scale = self.other_scale_np
         with fluid.program_guard(self.test_program):
-            self.static_values = layers.data(
-                name='values', shape=[dims], dtype='float32'
+            self.static_values = paddle.static.data(
+                name='values', shape=[-1, dims], dtype='float32'
             )
 
 
@@ -266,8 +265,8 @@ class NormalTest4(NormalTest):
         self.static_other_loc = self.other_loc_np
         self.static_other_scale = self.other_scale_np
         with fluid.program_guard(self.test_program):
-            self.static_values = layers.data(
-                name='values', shape=[dims], dtype='float32'
+            self.static_values = paddle.static.data(
+                name='values', shape=[-1, dims], dtype='float32'
             )
 
 
@@ -302,8 +301,8 @@ class NormalTest5(NormalTest):
         self.static_other_loc = self.other_loc_np
         self.static_other_scale = self.other_scale_np
         with fluid.program_guard(self.test_program):
-            self.static_values = layers.data(
-                name='values', shape=[dims], dtype='float64'
+            self.static_values = paddle.static.data(
+                name='values', shape=[-1, dims], dtype='float64'
             )
 
 
@@ -334,20 +333,20 @@ class NormalTest6(NormalTest):
 
     def init_static_data(self, batch_size, dims):
         with fluid.program_guard(self.test_program):
-            self.static_loc = layers.data(
-                name='loc', shape=[dims], dtype='float32'
+            self.static_loc = paddle.static.data(
+                name='loc', shape=[-1, dims], dtype='float32'
             )
-            self.static_scale = layers.data(
-                name='scale', shape=[dims], dtype='float32'
+            self.static_scale = paddle.static.data(
+                name='scale', shape=[-1, dims], dtype='float32'
             )
-            self.static_values = layers.data(
-                name='values', shape=[dims], dtype='float32'
+            self.static_values = paddle.static.data(
+                name='values', shape=[-1, dims], dtype='float32'
             )
-            self.static_other_loc = layers.data(
-                name='other_loc', shape=[dims], dtype='float32'
+            self.static_other_loc = paddle.static.data(
+                name='other_loc', shape=[-1, dims], dtype='float32'
             )
-            self.static_other_scale = layers.data(
-                name='other_scale', shape=[dims], dtype='float32'
+            self.static_other_scale = paddle.static.data(
+                name='other_scale', shape=[-1, dims], dtype='float32'
             )
 
 
@@ -382,20 +381,20 @@ class NormalTest7(NormalTest):
 
     def init_static_data(self, batch_size, dims):
         with fluid.program_guard(self.test_program):
-            self.static_loc = layers.data(
-                name='loc', shape=[dims], dtype='float64'
+            self.static_loc = paddle.static.data(
+                name='loc', shape=[-1, dims], dtype='float64'
             )
-            self.static_scale = layers.data(
-                name='scale', shape=[dims], dtype='float64'
+            self.static_scale = paddle.static.data(
+                name='scale', shape=[-1, dims], dtype='float64'
             )
-            self.static_values = layers.data(
-                name='values', shape=[dims], dtype='float64'
+            self.static_values = paddle.static.data(
+                name='values', shape=[-1, dims], dtype='float64'
             )
-            self.static_other_loc = layers.data(
-                name='other_loc', shape=[dims], dtype='float64'
+            self.static_other_loc = paddle.static.data(
+                name='other_loc', shape=[-1, dims], dtype='float64'
             )
-            self.static_other_scale = layers.data(
-                name='other_scale', shape=[dims], dtype='float64'
+            self.static_other_scale = paddle.static.data(
+                name='other_scale', shape=[-1, dims], dtype='float64'
             )
 
 
@@ -430,20 +429,20 @@ class NormalTest8(NormalTest):
 
     def init_static_data(self, batch_size, dims):
         with fluid.program_guard(self.test_program):
-            self.static_loc = layers.data(
-                name='loc', shape=[dims], dtype='float64'
+            self.static_loc = paddle.static.data(
+                name='loc', shape=[-1, dims], dtype='float64'
             )
-            self.static_scale = layers.data(
-                name='scale', shape=[dims], dtype='float64'
+            self.static_scale = paddle.static.data(
+                name='scale', shape=[-1, dims], dtype='float64'
             )
-            self.static_values = layers.data(
-                name='values', shape=[dims], dtype='float32'
+            self.static_values = paddle.static.data(
+                name='values', shape=[-1, dims], dtype='float32'
             )
-            self.static_other_loc = layers.data(
-                name='other_loc', shape=[dims], dtype='float64'
+            self.static_other_loc = paddle.static.data(
+                name='other_loc', shape=[-1, dims], dtype='float64'
             )
-            self.static_other_scale = layers.data(
-                name='other_scale', shape=[dims], dtype='float64'
+            self.static_other_scale = paddle.static.data(
+                name='other_scale', shape=[-1, dims], dtype='float64'
             )
 
 
@@ -477,8 +476,8 @@ class NormalTest9(NormalTest):
         self.static_other_loc = self.other_loc_np
         self.static_other_scale = self.other_scale_np
         with fluid.program_guard(self.test_program):
-            self.static_values = layers.data(
-                name='values', shape=[dims], dtype='float32'
+            self.static_values = paddle.static.data(
+                name='values', shape=[-1, dims], dtype='float32'
             )
 
 
@@ -512,8 +511,8 @@ class NormalTest10(NormalTest):
         self.static_other_loc = self.other_loc_np
         self.static_other_scale = self.other_scale_np
         with fluid.program_guard(self.test_program):
-            self.static_values = layers.data(
-                name='values', shape=[dims], dtype='float32'
+            self.static_values = paddle.static.data(
+                name='values', shape=[-1, dims], dtype='float32'
             )
 
 
diff --git a/python/paddle/fluid/tests/unittests/distribution/test_distribution_uniform.py b/python/paddle/fluid/tests/unittests/distribution/test_distribution_uniform.py
index da5500ccefb0edc65a4df7d517e35d1a932ae8b7..ca59b5118e67754ec71e020b48ea1ce601b54714 100644
--- a/python/paddle/fluid/tests/unittests/distribution/test_distribution_uniform.py
+++ b/python/paddle/fluid/tests/unittests/distribution/test_distribution_uniform.py
@@ -20,7 +20,6 @@ from test_distribution import DistributionNumpy
 import paddle
 from paddle import fluid
 from paddle.distribution import Uniform
-from paddle.fluid import layers
 
 np.random.seed(2022)
 
@@ -88,8 +87,8 @@ class UniformTest(unittest.TestCase):
         self.static_low = self.low_np
         self.static_high = self.high_np
         with fluid.program_guard(self.test_program):
-            self.static_values = layers.data(
-                name='values', shape=[], dtype='float32'
+            self.static_values = paddle.static.data(
+                name='values', shape=[-1], dtype='float32'
             )
 
     def compare_with_numpy(self, fetch_list, sample_shape=7, tolerance=1e-6):
@@ -170,8 +169,8 @@ class UniformTest3(UniformTest):
         self.static_low = self.low_np
         self.static_high = self.high_np
         with fluid.program_guard(self.test_program):
-            self.static_values = layers.data(
-                name='values', shape=[dims], dtype='float32'
+            self.static_values = paddle.static.data(
+                name='values', shape=[-1, dims], dtype='float32'
             )
 
 
@@ -188,8 +187,8 @@ class UniformTest4(UniformTest):
         self.static_low = self.low_np
         self.static_high = self.high_np
         with fluid.program_guard(self.test_program):
-            self.static_values = layers.data(
-                name='values', shape=[dims], dtype='float32'
+            self.static_values = paddle.static.data(
+                name='values', shape=[-1, dims], dtype='float32'
             )
 
 
@@ -211,8 +210,8 @@ class UniformTest5(UniformTest):
         self.static_low = self.low_np
         self.static_high = self.high_np
         with fluid.program_guard(self.test_program):
-            self.static_values = layers.data(
-                name='values', shape=[dims], dtype='float64'
+            self.static_values = paddle.static.data(
+                name='values', shape=[-1, dims], dtype='float64'
             )
 
 
@@ -232,14 +231,14 @@ class UniformTest6(UniformTest):
 
     def init_static_data(self, batch_size, dims):
         with fluid.program_guard(self.test_program):
-            self.static_low = layers.data(
-                name='low', shape=[dims], dtype='float32'
+            self.static_low = paddle.static.data(
+                name='low', shape=[-1, dims], dtype='float32'
             )
-            self.static_high = layers.data(
-                name='high', shape=[dims], dtype='float32'
+            self.static_high = paddle.static.data(
+                name='high', shape=[-1, dims], dtype='float32'
             )
-            self.static_values = layers.data(
-                name='values', shape=[dims], dtype='float32'
+            self.static_values = paddle.static.data(
+                name='values', shape=[-1, dims], dtype='float32'
             )
 
 
@@ -259,14 +258,14 @@ class UniformTest7(UniformTest):
 
     def init_static_data(self, batch_size, dims):
         with fluid.program_guard(self.test_program):
-            self.static_low = layers.data(
-                name='low', shape=[dims], dtype='float64'
+            self.static_low = paddle.static.data(
+                name='low', shape=[-1, dims], dtype='float64'
             )
-            self.static_high = layers.data(
-                name='high', shape=[dims], dtype='float64'
+            self.static_high = paddle.static.data(
+                name='high', shape=[-1, dims], dtype='float64'
             )
-            self.static_values = layers.data(
-                name='values', shape=[dims], dtype='float64'
+            self.static_values = paddle.static.data(
+                name='values', shape=[-1, dims], dtype='float64'
             )
 
 
@@ -286,14 +285,14 @@ class UniformTest8(UniformTest):
 
     def init_static_data(self, batch_size, dims):
         with fluid.program_guard(self.test_program):
-            self.static_low = layers.data(
-                name='low', shape=[dims], dtype='float64'
+            self.static_low = paddle.static.data(
+                name='low', shape=[-1, dims], dtype='float64'
             )
-            self.static_high = layers.data(
-                name='high', shape=[dims], dtype='float64'
+            self.static_high = paddle.static.data(
+                name='high', shape=[-1, dims], dtype='float64'
             )
-            self.static_values = layers.data(
-                name='values', shape=[dims], dtype='float32'
+            self.static_values = paddle.static.data(
+                name='values', shape=[-1, dims], dtype='float32'
             )
 
 
@@ -311,8 +310,8 @@ class UniformTest9(UniformTest):
         self.static_low = self.low_np
         self.static_high = self.high_np
         with fluid.program_guard(self.test_program):
-            self.static_values = layers.data(
-                name='values', shape=[dims], dtype='float32'
+            self.static_values = paddle.static.data(
+                name='values', shape=[-1, dims], dtype='float32'
             )
 
 
@@ -333,8 +332,8 @@ class UniformTest10(UniformTest):
         self.static_low = self.low_np
         self.static_high = self.high_np
         with fluid.program_guard(self.test_program):
-            self.static_values = layers.data(
-                name='values', shape=[dims], dtype='float32'
+            self.static_values = paddle.static.data(
+                name='values', shape=[-1, dims], dtype='float32'
             )
 
 
@@ -355,8 +354,8 @@ class UniformTest11(UniformTest):
         self.static_low = self.low_np
         self.static_high = self.high_np
         with fluid.program_guard(self.test_program):
-            self.static_values = layers.data(
-                name='values', shape=[dims], dtype='float32'
+            self.static_values = paddle.static.data(
+                name='values', shape=[-1, dims], dtype='float32'
             )
 
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bert.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bert.py
index 4ac7a3dfe4cf39076d4a550cff04466aeaacad7c..f4d59f1a1552f978460e6c23ffaf209f497b154e 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bert.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bert.py
@@ -236,11 +236,11 @@ class TestBert(unittest.TestCase):
         self.verify_predict()
 
     def test_train_composite(self):
-        core.set_prim_enabled(True)
+        core._set_prim_backward_enabled(True)
         static_loss, static_ppl = self.train_static(
             self.bert_config, self.data_reader
         )
-        core.set_prim_enabled(False)
+        core._set_prim_backward_enabled(False)
         dygraph_loss, dygraph_ppl = self.train_dygraph(
             self.bert_config, self.data_reader
         )
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim.py
index 2811a348f46561423449eb9f646e750c7935e3cc..a807e1eef234048fd0860ea616a7f6aaf241da7e 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cinn_prim.py
@@ -47,7 +47,6 @@ class TestPrimForward(unittest.TestCase):
     """
 
     def setUp(self):
-        core.set_prim_backward(False)
         paddle.seed(2022)
         self.x = paddle.randn([2, 4])
         self.x.stop_gradient = False
@@ -58,6 +57,7 @@ class TestPrimForward(unittest.TestCase):
         sgd = paddle.optimizer.SGD(
             learning_rate=0.1, parameters=net.parameters()
         )
+        core._set_prim_forward_enabled(use_prim)
         if use_prim:
             net = apply_to_static(net, use_prim)
 
@@ -103,12 +103,12 @@ class TestPrimForwardAndBackward(unittest.TestCase):
         self.x.stop_gradient = False
 
     def train(self, use_prim):
-        core.set_prim_backward(True)
         paddle.seed(2022)
         net = PrimeNet()
         sgd = paddle.optimizer.SGD(
             learning_rate=0.1, parameters=net.parameters()
         )
+        core._set_prim_all_enabled(use_prim)
         if use_prim:
             net = apply_to_static(net, use_prim)
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py
index b195c7d342a724598bfa175c60442d3bca418048..911ca2ec9016f122b0ed16abd506fda22d4aaccd 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py
@@ -427,10 +427,10 @@ class TestResnet(unittest.TestCase):
         )
         self.verify_predict()
 
-    def test_resnet_composite(self):
-        core.set_prim_enabled(True)
+    def test_resnet_composite_backward(self):
+        core._set_prim_backward_enabled(True)
         static_loss = self.train(to_static=True)
-        core.set_prim_enabled(False)
+        core._set_prim_backward_enabled(False)
         dygraph_loss = self.train(to_static=True)
         np.testing.assert_allclose(
             static_loss,
@@ -440,65 +440,13 @@ class TestResnet(unittest.TestCase):
                 static_loss, dygraph_loss
             ),
         )
-        core.set_prim_enabled(False)
 
-    def test_in_static_mode_mkldnn(self):
-        fluid.set_flags({'FLAGS_use_mkldnn': True})
-        try:
-            if paddle.fluid.core.is_compiled_with_mkldnn():
-                self.resnet_helper.train(to_static=True)
-        finally:
-            fluid.set_flags({'FLAGS_use_mkldnn': False})
-
-
-class TestResnetPrim(unittest.TestCase):
-    "test prim forward +  prim backward + to_static"
-
-    def setUp(self):
-        self.resnet_helper = ResNetHelper()
-
-    def train(self, to_static):
-        paddle.jit.enable_to_static(to_static)
-        return self.resnet_helper.train(to_static)
-
-    def verify_predict(self):
-        image = np.random.random([1, 3, 224, 224]).astype('float32')
-        dy_pre = self.resnet_helper.predict_dygraph(image)
-        st_pre = self.resnet_helper.predict_static(image)
-        dy_jit_pre = self.resnet_helper.predict_dygraph_jit(image)
-        predictor_pre = self.resnet_helper.predict_analysis_inference(image)
-        np.testing.assert_allclose(
-            dy_pre,
-            st_pre,
-            rtol=1e-05,
-            err_msg='dy_pre:\n {}\n, st_pre: \n{}.'.format(dy_pre, st_pre),
-        )
-        np.testing.assert_allclose(
-            dy_jit_pre,
-            st_pre,
-            rtol=1e-05,
-            err_msg='dy_jit_pre:\n {}\n, st_pre: \n{}.'.format(
-                dy_jit_pre, st_pre
-            ),
-        )
-        np.testing.assert_allclose(
-            predictor_pre,
-            st_pre,
-            rtol=1e-05,
-            err_msg='predictor_pre:\n {}\n, st_pre: \n{}.'.format(
-                predictor_pre, st_pre
-            ),
-        )
-
-    def test_resnet_composite(self):
+    def test_resnet_composite_forward_backward(self):
         plat = platform.system()
         if plat == "Linux":
-            print("=================== origin resnet ===================")
-            core.set_prim_enabled(False)
+            core._set_prim_all_enabled(True)
             static_loss = self.train(to_static=True)
-            print("======= resnet with prim forward and backward =======")
-            core.set_prim_enabled(True)
-            core.set_prim_forward("debug")
+            core._set_prim_all_enabled(False)
             dygraph_loss = self.train(to_static=True)
             np.testing.assert_allclose(
                 static_loss,
@@ -508,10 +456,17 @@ class TestResnetPrim(unittest.TestCase):
                     static_loss, dygraph_loss
                 ),
             )
-            core.set_prim_enabled(False)
         else:
             pass
 
+    def test_in_static_mode_mkldnn(self):
+        fluid.set_flags({'FLAGS_use_mkldnn': True})
+        try:
+            if paddle.fluid.core.is_compiled_with_mkldnn():
+                self.resnet_helper.train(to_static=True)
+        finally:
+            fluid.set_flags({'FLAGS_use_mkldnn': False})
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_amp.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_amp.py
index 8e6872c079cec5157eef6b7debb654ff88a43261..f0cd98c2c110bbb9592c69b3373c3da2e74f75fb 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_amp.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_amp.py
@@ -130,9 +130,9 @@ class TestResnet(unittest.TestCase):
         )
 
     def test_resnet_composite(self):
-        core.set_prim_enabled(True)
+        core._set_prim_backward_enabled(True)
         static_loss = self.train(to_static=True)
-        core.set_prim_enabled(False)
+        core._set_prim_backward_enabled(False)
         dygraph_loss = self.train(to_static=False)
         np.testing.assert_allclose(
             static_loss,
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_pure_fp16.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_pure_fp16.py
index 6213f6fae2415a90eaea437044df0a975f69406a..252b63a646b7a42e288cc2b16d2262de223bfdcf 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_pure_fp16.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_pure_fp16.py
@@ -137,9 +137,9 @@ class TestResnet(unittest.TestCase):
 
     def test_resnet_composite(self):
         if fluid.is_compiled_with_cuda():
-            core.set_prim_enabled(True)
+            core._set_prim_backward_enabled(True)
             static_loss = self.train(to_static=True)
-            core.set_prim_enabled(False)
+            core._set_prim_backward_enabled(False)
             dygraph_loss = self.train(to_static=False)
             # NOTE: In pure fp16 training, loss is not stable, so we enlarge atol here.
             np.testing.assert_allclose(
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_v2.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_v2.py
index 1b4d01114f8c2acd680b1bd1f232cbdafd139cfa..5bbeba860f590bd6b51930807e928cdd28225c19 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_v2.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_v2.py
@@ -426,9 +426,9 @@ class TestResnet(unittest.TestCase):
         self.verify_predict()
 
     def test_resnet_composite(self):
-        core.set_prim_enabled(True)
+        core._set_prim_backward_enabled(True)
         static_loss = self.train(to_static=True)
-        core.set_prim_enabled(False)
+        core._set_prim_backward_enabled(False)
         dygraph_loss = self.train(to_static=False)
         np.testing.assert_allclose(
             static_loss,
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_util.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_util.py
index 92bdde2d94f9eaf29a8ed5cb16c85b62f419389c..680d8afd4860ff61e924722e4bc73a303bb42f65 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_util.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_util.py
@@ -296,12 +296,11 @@ class InputField:
         self.feed_list = []
         for slot in input_slots:
             self.feed_list.append(
-                fluid.layers.data(
+                paddle.static.data(
                     name=slot['name'],
                     shape=slot['shape'],
                     dtype=slot['dtype'],
                     lod_level=slot.get('lod_level', 0),
-                    append_batch_size=False,
                 )
             )
 
diff --git a/python/paddle/fluid/tests/unittests/eager_op_test.py b/python/paddle/fluid/tests/unittests/eager_op_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..4b02ac5684dda360012b6c0e4e596a4177f0d0a9
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/eager_op_test.py
@@ -0,0 +1,2552 @@
+#   Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import functools
+import os
+import random
+import struct
+import sys
+import unittest
+import warnings
+from collections import defaultdict
+from copy import copy
+
+import numpy as np
+
+import paddle
+import paddle.fluid as fluid
+import paddle.fluid.core as core
+from paddle.fluid import unique_name
+from paddle.fluid.backward import append_backward
+from paddle.fluid.executor import Executor
+from paddle.fluid.framework import (
+    OpProtoHolder,
+    Program,
+    _current_expected_place,
+    _dygraph_tracer,
+    in_dygraph_mode,
+)
+from paddle.fluid.op import Operator
+from paddle.jit.dy2static.utils import parse_arg_and_kwargs
+
+sys.path.append(os.path.abspath(os.path.dirname(__file__)))
+from testsuite import append_input_output, append_loss_ops, create_op, set_input
+from white_list import (
+    check_shape_white_list,
+    compile_vs_runtime_white_list,
+    no_check_set_white_list,
+    no_grad_set_white_list,
+    op_accuracy_white_list,
+    op_threshold_white_list,
+)
+
+
+def check_out_dtype(api_fn, in_specs, expect_dtypes, target_index=0, **configs):
+    """
+    Determines whether dtype of output tensor is as expected.
+
+    Args:
+        api_fn(callable):  paddle api function
+        in_specs(list[tuple]): list of shape and dtype information for constructing input tensor of api_fn, such as [(shape, dtype), (shape, dtype)].
+        expected_dtype(list[str]): expected dtype of output tensor.
+        target_index(int): indicate which one from in_specs to infer the dtype of output.
+        config(dict): other arguments of paddle api function
+
+    Example:
+        check_out_dtype(fluid.layers.pad_constant_like, [([2,3,2,3], 'float64'), ([1, 3, 1,3], )], ['float32', 'float64', 'int64'], target_index=1, pad_value=0.)
+
+    """
+    paddle.enable_static()
+    for i, expect_dtype in enumerate(expect_dtypes):
+        with paddle.static.program_guard(paddle.static.Program()):
+            input_t = []
+            for index, spec in enumerate(in_specs):
+                if len(spec) == 1:
+                    shape = spec[0]
+                    dtype = expect_dtype if target_index == index else 'float32'
+                elif len(spec) == 2:
+                    shape, dtype = spec
+                else:
+                    raise ValueError(
+                        "Value of in_specs[{}] should contains two elements: [shape, dtype]".format(
+                            index
+                        )
+                    )
+                input_t.append(
+                    paddle.static.data(
+                        name='data_%s' % index, shape=shape, dtype=dtype
+                    )
+                )
+
+            out = api_fn(*input_t, **configs)
+            out_dtype = fluid.data_feeder.convert_dtype(out.dtype)
+
+            if out_dtype != expect_dtype:
+                raise ValueError(
+                    "Expected out.dtype is {}, but got {} from {}.".format(
+                        expect_dtype, out_dtype, api_fn.__name__
+                    )
+                )
+
+
+def _set_use_system_allocator(value=None):
+    USE_SYSTEM_ALLOCATOR_FLAG = "FLAGS_use_system_allocator"
+    old_value = core.globals()[USE_SYSTEM_ALLOCATOR_FLAG]
+    value = old_value if value is None else value
+    core.globals()[USE_SYSTEM_ALLOCATOR_FLAG] = value
+    return old_value
+
+
+def randomize_probability(batch_size, class_num, dtype='float32'):
+    prob = np.random.uniform(0.1, 1.0, size=(batch_size, class_num)).astype(
+        dtype
+    )
+    prob_sum = prob.sum(axis=1)
+    for i in range(len(prob)):
+        prob[i] /= prob_sum[i]
+    return prob
+
+
+def get_numeric_gradient(
+    place,
+    scope,
+    op,
+    inputs,
+    input_to_check,
+    output_names,
+    delta=0.005,
+    in_place=False,
+):
+    # FIXME: change this method by compile time concepts
+    set_input(scope, op, inputs, place)
+
+    def product(dim):
+        return functools.reduce(lambda a, b: a * b, dim, 1)
+
+    tensor_to_check = scope.find_var(input_to_check).get_tensor()
+    tensor_size = product(tensor_to_check.shape())
+    tensor_to_check_dtype = tensor_to_check._dtype()
+    if tensor_to_check_dtype == core.VarDesc.VarType.FP32:
+        tensor_to_check_dtype = np.float32
+    elif tensor_to_check_dtype == core.VarDesc.VarType.FP64:
+        tensor_to_check_dtype = np.float64
+    elif tensor_to_check_dtype == core.VarDesc.VarType.FP16:
+        tensor_to_check_dtype = np.float16
+        # set delta as np.float16, will automatic convert to float32, float64
+        delta = np.array(delta).astype(np.float16)
+    elif tensor_to_check_dtype == core.VarDesc.VarType.BF16:
+        tensor_to_check_dtype = np.float32
+    elif tensor_to_check_dtype == core.VarDesc.VarType.COMPLEX64:
+        tensor_to_check_dtype = np.complex64
+    elif tensor_to_check_dtype == core.VarDesc.VarType.COMPLEX128:
+        tensor_to_check_dtype = np.complex128
+    else:
+        raise ValueError(
+            "Not supported data type "
+            + str(tensor_to_check_dtype)
+            + ", tensor name : "
+            + str(input_to_check)
+        )
+
+    def get_output():
+        sum = []
+        op.run(scope, place)
+        for output_name in output_names:
+            output_numpy = np.array(scope.find_var(output_name).get_tensor())
+            # numpy.dtype does not have bfloat16, thus we use numpy.uint16 to
+            # store bfloat16 data, and need to be converted to float to check
+            # the floating precision.
+            if tensor_to_check._dtype() == core.VarDesc.VarType.BF16:
+                output_numpy = convert_uint16_to_float(output_numpy)
+            sum.append(output_numpy.astype(tensor_to_check_dtype).mean())
+        return tensor_to_check_dtype(np.array(sum).sum() / len(output_names))
+
+    gradient_flat = np.zeros(shape=(tensor_size,), dtype=tensor_to_check_dtype)
+
+    def __get_elem__(tensor, i):
+        if tensor_to_check_dtype == np.float16:
+            numpy_tensor = np.array(tensor).astype(np.float16)
+            numpy_tensor = numpy_tensor.flatten()
+            return numpy_tensor[i]
+        elif tensor_to_check._dtype() == core.VarDesc.VarType.BF16:
+            numpy_tensor = np.array(tensor).astype(np.uint16)
+            numpy_tensor = numpy_tensor.flatten()
+            return struct.unpack(
+                '<f',
+                struct.pack('<I', np.uint32(numpy_tensor[i]) << np.uint32(16)),
+            )[0]
+        elif tensor_to_check_dtype == np.float32:
+            return tensor._get_float_element(i)
+        elif tensor_to_check_dtype == np.float64:
+            return tensor._get_double_element(i)
+        else:
+            raise TypeError(
+                "Unsupported test data type %s." % tensor_to_check_dtype
+            )
+
+    def __set_elem__(tensor, i, e):
+        if tensor_to_check_dtype == np.float16:
+            numpy_tensor = np.array(tensor).astype(np.float16)
+            shape = numpy_tensor.shape
+            numpy_tensor = numpy_tensor.flatten()
+            numpy_tensor[i] = e
+            numpy_tensor = numpy_tensor.reshape(shape)
+            tensor.set(numpy_tensor, place)
+        elif tensor_to_check._dtype() == core.VarDesc.VarType.BF16:
+            numpy_tensor = np.array(tensor).astype(np.uint16)
+            shape = numpy_tensor.shape
+            numpy_tensor = numpy_tensor.flatten()
+            numpy_tensor[i] = np.uint16(copy_bits_from_float_to_uint16(e))
+            numpy_tensor = numpy_tensor.reshape(shape)
+            tensor.set(numpy_tensor, place)
+        elif tensor_to_check_dtype == np.float32:
+            tensor._set_float_element(i, e)
+        elif tensor_to_check_dtype == np.float64:
+            tensor._set_double_element(i, e)
+        else:
+            raise TypeError(
+                "Unsupported test data type %s." % tensor_to_check_dtype
+            )
+
+    # we only compute gradient of one element each time.
+    # we use a for loop to compute the gradient of every element.
+    for i in range(tensor_size):
+        if in_place:
+            set_input(scope, op, inputs, place)
+
+        # get one input element throw it's index i.
+        origin = __get_elem__(tensor_to_check, i)
+        # add delta to it, run op and then get the sum of the result tensor.
+        x_pos = origin + delta
+        __set_elem__(tensor_to_check, i, x_pos)
+        y_pos = get_output()
+
+        if in_place:
+            set_input(scope, op, inputs, place)
+
+        x_neg = origin - delta
+        __set_elem__(tensor_to_check, i, x_neg)
+        y_neg = get_output()
+
+        __set_elem__(tensor_to_check, i, origin)
+        gradient_flat[i] = (y_pos - y_neg) / delta / 2
+
+    return gradient_flat.reshape(tensor_to_check.shape())
+
+
+def skip_check_grad_ci(reason=None):
+    """Decorator to skip check_grad CI.
+
+    Check_grad is required for Op test cases. However, there are some special
+    cases that do not need to do check_grad. This decorator is used to skip the
+    check_grad of the above cases.
+
+    Note: the execution of unit test will not be skipped. It just avoids check_grad
+    checking in tearDownClass method by setting a `no_need_check_grad` flag.
+
+    Example:
+        @skip_check_grad_ci(reason="For inference, check_grad is not required.")
+        class TestInference(OpTest):
+    """
+    if not isinstance(reason, str):
+        raise AssertionError("The reason for skipping check_grad is required.")
+
+    def wrapper(cls):
+        cls.no_need_check_grad = True
+        return cls
+
+    return wrapper
+
+
+def skip_check_inplace_ci(reason=None):
+    if not isinstance(reason, str):
+        raise AssertionError(
+            "The reason for skipping check_inplace is required."
+        )
+
+    def wrapper(cls):
+        cls.no_need_check_inplace = True
+        return cls
+
+    return wrapper
+
+
+def copy_bits_from_float_to_uint16(f):
+    return struct.unpack('<I', struct.pack('<f', f))[0] >> 16
+
+
+def convert_float_to_uint16(float_list, data_format="NCHW"):
+    if data_format == "NHWC":
+        float_list = np.transpose(float_list, [0, 3, 1, 2])
+
+    new_output = []
+    for x in np.nditer(float_list):
+        new_output.append(np.uint16(copy_bits_from_float_to_uint16(x)))
+    new_output = np.reshape(new_output, float_list.shape).view(np.uint16)
+
+    if data_format == "NHWC":
+        new_output = np.transpose(new_output, [0, 2, 3, 1])
+    return new_output
+
+
+def convert_uint16_to_float(in_list):
+    in_list = np.asarray(in_list)
+    out = np.vectorize(
+        lambda x: struct.unpack(
+            '<f', struct.pack('<I', np.uint32(x) << np.uint32(16))
+        )[0],
+        otypes=[np.float32],
+    )(in_list.flat)
+    return np.reshape(out, in_list.shape)
+
+
+class OpTest(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        '''Fix random seeds to remove randomness from tests'''
+        cls._np_rand_state = np.random.get_state()
+        cls._py_rand_state = random.getstate()
+        cls.call_once = False
+        cls.dtype = None
+        cls.outputs = {}
+        cls.input_shape_is_large = True
+
+        np.random.seed(123)
+        random.seed(124)
+
+        if paddle.is_compiled_with_npu():
+            cls._use_system_allocator = _set_use_system_allocator(False)
+        else:
+            cls._use_system_allocator = _set_use_system_allocator(True)
+
+    @classmethod
+    def tearDownClass(cls):
+        """Restore random seeds"""
+        np.random.set_state(cls._np_rand_state)
+        random.setstate(cls._py_rand_state)
+
+        _set_use_system_allocator(cls._use_system_allocator)
+
+        def is_empty_grad_op(op_type):
+            all_op_kernels = core._get_all_register_op_kernels()
+            grad_op = op_type + '_grad'
+            if grad_op in all_op_kernels.keys():
+                if is_mkldnn_op_test():
+                    grad_op_kernels = all_op_kernels[grad_op]
+                    for grad_op_kernel in grad_op_kernels:
+                        if 'MKLDNN' in grad_op_kernel:
+                            return False
+                else:
+                    return False
+            return True
+
+        def is_xpu_op_test():
+            return hasattr(cls, "use_xpu") and cls.use_xpu
+
+        def is_mkldnn_op_test():
+            return hasattr(cls, "use_mkldnn") and cls.use_mkldnn
+
+        def is_rocm_op_test():
+            return core.is_compiled_with_rocm()
+
+        def is_npu_op_test():
+            return hasattr(cls, "use_npu") and cls.use_npu
+
+        def is_mlu_op_test():
+            return hasattr(cls, "use_mlu") and cls.use_mlu
+
+        def is_custom_device_op_test():
+            return hasattr(cls, "use_custom_device") and cls.use_custom_device
+
+        if not hasattr(cls, "op_type"):
+            raise AssertionError(
+                "This test do not have op_type in class attrs, "
+                "please set self.__class__.op_type=the_real_op_type manually."
+            )
+
+        # case in NO_FP64_CHECK_GRAD_CASES and op in NO_FP64_CHECK_GRAD_OP_LIST should be fixed
+        if not hasattr(cls, "no_need_check_grad") and not is_empty_grad_op(
+            cls.op_type
+        ):
+            if cls.dtype is None or (
+                cls.dtype == np.float16
+                and cls.op_type
+                not in op_accuracy_white_list.NO_FP16_CHECK_GRAD_OP_LIST
+                and not hasattr(cls, "exist_check_grad")
+            ):
+                raise AssertionError(
+                    "This test of %s op needs check_grad." % cls.op_type
+                )
+
+            # check for op test with fp64 precision, but not check mkldnn op test for now
+            if (
+                cls.dtype in [np.float32, np.float64]
+                and cls.op_type
+                not in op_accuracy_white_list.NO_FP64_CHECK_GRAD_OP_LIST
+                and not hasattr(cls, 'exist_fp64_check_grad')
+                and not is_xpu_op_test()
+                and not is_mkldnn_op_test()
+                and not is_rocm_op_test()
+                and not is_npu_op_test()
+                and not is_mlu_op_test()
+                and not is_custom_device_op_test()
+            ):
+                raise AssertionError(
+                    "This test of %s op needs check_grad with fp64 precision."
+                    % cls.op_type
+                )
+
+            if (
+                not cls.input_shape_is_large
+                and cls.op_type
+                not in check_shape_white_list.NEED_TO_FIX_OP_LIST
+            ):
+                raise AssertionError(
+                    "Input's shape should be large than or equal to 100 for "
+                    + cls.op_type
+                    + " Op."
+                )
+
+    def try_call_once(self, data_type):
+        if not self.call_once:
+            self.call_once = True
+            self.dtype = data_type
+
+    def is_bfloat16_op(self):
+        # self.dtype is the dtype of inputs, and is set in infer_dtype_from_inputs_outputs.
+        # Make sure this function is called after calling infer_dtype_from_inputs_outputs.
+        return (
+            self.dtype == np.uint16
+            or (
+                hasattr(self, 'output_dtype') and self.output_dtype == np.uint16
+            )
+            or (
+                hasattr(self, 'mkldnn_data_type')
+                and getattr(self, 'mkldnn_data_type') == "bfloat16"
+            )
+            or (
+                hasattr(self, 'attrs')
+                and 'mkldnn_data_type' in self.attrs
+                and self.attrs['mkldnn_data_type'] == 'bfloat16'
+            )
+        )
+
+    def is_mkldnn_op(self):
+        return (hasattr(self, "use_mkldnn") and self.use_mkldnn) or (
+            hasattr(self, "attrs")
+            and "use_mkldnn" in self.attrs
+            and self.attrs["use_mkldnn"]
+        )
+
+    def is_xpu_op(self):
+        return (hasattr(self, "use_xpu") and self.use_xpu) or (
+            hasattr(self, "attrs")
+            and "use_xpu" in self.attrs
+            and self.attrs["use_xpu"]
+        )
+
+    # set the self.output_dtype .
+    def infer_dtype_from_inputs_outputs(self, inputs, outputs):
+        def is_np_data(input):
+            return isinstance(input, (np.ndarray, np.generic))
+
+        def infer_dtype(numpy_dict, dtype_set):
+            assert isinstance(
+                numpy_dict, dict
+            ), "self.inputs, self.outputs must be numpy_dict"
+            # the inputs are as follows:
+            # case 1: inputs = {'X': x}
+            # case 2: inputs = {'X': (x, x_lod)}
+            # case 3: inputs = {"X": [("x0", x0), ("x1", x1), ("x2", x2)]}
+            # case 4: inputs = {'X': [("x1", (x1, [x1_lod1])), ("x2", (x2, [x2_.lod2]))]}
+            # TODO(juncaipeng) infer dtype from inputs maybe obtain wrong type.
+            for _, var_value in numpy_dict.items():
+                if is_np_data(var_value):  # case 1
+                    dtype_set.add(var_value.dtype)
+                elif isinstance(var_value, (list, tuple)):  # case 2, 3, 4
+                    for sub_val_value in var_value:
+                        if is_np_data(sub_val_value):  # case 2
+                            dtype_set.add(sub_val_value.dtype)
+                        elif len(sub_val_value) > 1 and is_np_data(
+                            sub_val_value[1]
+                        ):  # case 3
+                            dtype_set.add(sub_val_value[1].dtype)
+                        elif (
+                            len(sub_val_value) > 1
+                            and isinstance(sub_val_value[1], (list, tuple))
+                            and is_np_data(sub_val_value[1][0])
+                        ):  # case 4
+                            dtype_set.add(sub_val_value[1][0].dtype)
+
+        # infer dtype from inputs, and dtype means the precision of the test
+        # collect dtype of all inputs
+        input_dtype_set = set()
+        infer_dtype(inputs, input_dtype_set)
+        dtype_list = [
+            np.dtype(np.float64),
+            np.dtype(np.float32),
+            np.dtype(np.float16),
+            np.dtype(np.int64),
+            np.dtype(np.int32),
+            np.dtype(np.uint16),
+            np.dtype(np.int16),
+            np.dtype(np.int8),
+            np.dtype(np.uint8),
+            np.dtype(np.bool_),
+        ]
+        # check the dtype in dtype_list in order, select the first dtype that in dtype_set
+        for dtype in dtype_list:
+            if dtype in input_dtype_set:
+                self.dtype = dtype
+                break
+        # save input dtype in class attr
+        self.__class__.dtype = self.dtype
+
+        # infer dtype of outputs
+        output_dtype_set = set()
+        infer_dtype(outputs, output_dtype_set)
+        for dtype in dtype_list:
+            if dtype in output_dtype_set:
+                self.output_dtype = dtype
+                break
+
+    def feed_var(self, input_vars, place):
+        feed_map = {}
+        for var_name in input_vars:
+            if isinstance(input_vars[var_name], list):
+                for name, np_value in self.inputs[var_name]:
+                    tensor = core.LoDTensor()
+                    if isinstance(np_value, tuple):
+                        tensor.set(np_value[0], place)
+                        tensor.set_recursive_sequence_lengths(np_value[1])
+                    else:
+                        tensor.set(np_value, place)
+                    feed_map[name] = tensor
+            else:
+                tensor = core.LoDTensor()
+                if isinstance(self.inputs[var_name], tuple):
+                    tensor.set(self.inputs[var_name][0], place)
+                    tensor.set_recursive_sequence_lengths(
+                        self.inputs[var_name][1]
+                    )
+                else:
+                    tensor.set(self.inputs[var_name], place)
+                feed_map[var_name] = tensor
+
+        return feed_map
+
+    def _append_ops(self, block):
+        self.__class__.op_type = (
+            self.op_type
+        )  # for ci check, please not delete it for now
+        if self.is_mkldnn_op():
+            self.__class__.use_mkldnn = True
+
+        if self.is_xpu_op():
+            self.__class__.use_xpu = True
+
+        op_proto = OpProtoHolder.instance().get_op_proto(self.op_type)
+        "infer datatype from inputs and outputs for this test case"
+        if self.is_bfloat16_op():
+            self.dtype = np.uint16
+            self.__class__.dtype = self.dtype
+            self.output_dtype = np.uint16
+        else:
+            self.infer_dtype_from_inputs_outputs(self.inputs, self.outputs)
+        inputs = append_input_output(
+            block, op_proto, self.inputs, True, self.dtype
+        )
+        outputs = append_input_output(
+            block, op_proto, self.outputs, False, self.dtype
+        )
+
+        if hasattr(self, "cache_name_list"):
+            for name in self.cache_name_list:
+                inputs[name] = block.create_var(
+                    name=name,
+                    persistable=True,
+                    type=core.VarDesc.VarType.RAW,
+                    stop_gradient=True,
+                )
+
+        op = block.append_op(
+            type=self.op_type,
+            inputs=inputs,
+            outputs=outputs,
+            attrs=copy(self.attrs) if hasattr(self, "attrs") else dict(),
+        )
+        # infer variable type and infer shape in compile-time
+        op.desc.infer_var_type(block.desc)
+        op.desc.infer_shape(block.desc)
+
+        return op
+
+    def _get_io_vars(self, block, numpy_inputs):
+        inputs = {}
+        for name, value in numpy_inputs.items():
+            if isinstance(value, list):
+                var_list = [
+                    block.var(sub_name) for sub_name, sub_value in value
+                ]
+                inputs[name] = var_list
+            else:
+                inputs[name] = block.var(name)
+        return inputs
+
+    def _get_inputs(self, block):
+        return self._get_io_vars(block, self.inputs)
+
+    def _get_outputs(self, block):
+        return self._get_io_vars(block, self.outputs)
+
+    def calc_output(self, place):
+        outs, _ = self._calc_output(place)
+        return outs
+
+    def _create_var_from_numpy(self, value):
+        if isinstance(value, tuple):
+            data = value[0]
+            lod = value[1]
+            v = fluid.dygraph.base.to_variable(value=data)
+            v.value().get_tensor().set_recursive_sequence_lengths(lod)
+            return v
+        else:
+            return fluid.dygraph.base.to_variable(value)
+
+    def get_sequence_batch_size_1_input(self, lod=None, shape=None):
+        """Get LoD input data whose batch size is 1.
+        All sequence related OP unittests should call this function to contain the case of batch size = 1.
+        Args:
+            lod (list[list of int], optional): Length-based LoD, length of lod[0] should be 1. Default: [[13]].
+            shape (list, optional): Shape of input, shape[0] should be equals to lod[0][0]. Default: [13, 23].
+        Returns:
+            tuple (ndarray, lod) : LoD input data whose batch size is 1.
+        """
+        if lod is None:
+            lod = [[13]]
+        if shape is None:
+            shape = [13, 23]
+        assert len(lod[0]) == 1
+        assert lod[0][0] == shape[0]
+        x = np.random.uniform(0.1, 1, shape).astype('float32')
+        return (x, lod)
+
+    def lod_has_single_zero(self, lod):
+        for i in range(len(lod) - 2):
+            if lod[i] != 0 and lod[i + 1] == 0 and lod[i + 2] != 0:
+                return True
+        return False
+
+    def lod_has_continuous_zero(self, lod):
+        for i in range(len(lod) - 3):
+            if (
+                lod[i] != 0
+                and lod[i + 1] == 0
+                and lod[i + 2] == 0
+                and lod[i + 3] != 0
+            ):
+                return True
+        return False
+
+    def get_sequence_instance_size_0_input(self, lod=None, shape=None):
+        """Get LoD input data whose instance size is 0.
+        All sequence related OP unittests should call this function to contain the case of instance size is 0.
+        Args:
+            lod (list[list of int], optional): Length-based LoD, lod[0]'s size must at least eight, lod[0] must at least two zeros at the beginning and at least two zeros at the end, the middle position of lod[0] contains a single zero and multiple zero. Default: [[0, 0, 4, 0, 3, 0, 0, 5, 0, 0]].
+            shape (list, optional): Shape of input, shape[0] should be equals to lod[0][0]. Default: [13, 23].
+        Returns:
+            tuple (ndarray, lod): LoD input data whose instance size is 0.
+        """
+        if lod is None:
+            lod = [[0, 0, 4, 0, 3, 0, 0, 5, 0, 0]]
+        if shape is None:
+            shape = [12, 10]
+        assert len(lod[0]) >= 8
+        assert (
+            lod[0][0] == 0
+            and lod[0][1] == 0
+            and lod[0][-1] == 0
+            and lod[0][-2] == 0
+        )
+        assert self.lod_has_single_zero(lod[0]) is True
+        assert self.lod_has_continuous_zero(lod[0]) is True
+        assert sum(lod[0]) == shape[0]
+
+        x = np.random.uniform(0.1, 1, shape).astype('float32')
+        return (x, lod)
+
+    def append_input_output_for_dygraph(
+        self, op_proto, np_list, is_input, if_return_inputs_grad_dict, block
+    ):
+        def create_var(np_value, name, is_input, if_return_inputs_grad_dict):
+            np_value_temp = np_value
+            has_lod = False
+            lod_temp = None
+            if isinstance(np_value, tuple):
+                np_value_temp = np_value[0]
+                has_lod = True
+                lod_temp = np_value[1]
+
+            if is_input:
+                v = self._create_var_from_numpy(np_value_temp)
+
+                if if_return_inputs_grad_dict:
+                    v.stop_gradient = False
+                    if hasattr(v, "retain_grads"):
+                        v.retain_grads()
+
+                if has_lod:
+                    v.value().get_tensor().set_recursive_sequence_lengths(
+                        lod_temp
+                    )
+            else:
+                v = block.create_var(
+                    name=name,
+                    dtype=np_value_temp.dtype,
+                    type=core.VarDesc.VarType.LOD_TENSOR,
+                    persistable=False,
+                    stop_gradient=False,
+                )
+            return v
+
+        # prepare variable for input or output
+        var_dict = defaultdict(list)
+        if if_return_inputs_grad_dict:
+            inputs_grad_dict = defaultdict()
+        proto_list = op_proto.inputs if is_input else op_proto.outputs
+        for var_proto in proto_list:
+            name = var_proto.name
+            if (name not in np_list) and var_proto.dispensable:
+                continue
+            if name not in np_list:
+                assert var_proto.intermediate, "{} not found".format(name)
+                v = block.create_var(
+                    dtype='float32', type=core.VarDesc.VarType.LOD_TENSOR
+                )
+                var_dict[name].append(v)
+                if if_return_inputs_grad_dict:
+                    inputs_grad_dict[name] = v
+                continue
+            if var_proto.duplicable:
+                assert isinstance(
+                    np_list[name], list
+                ), "Duplicable {} should be set as list".format(name)
+                var_list = []
+                slot_name = name
+                for (name, np_value) in np_list[name]:
+                    v = create_var(
+                        np_value, name, is_input, if_return_inputs_grad_dict
+                    )
+                    var_list.append(v)
+                    if if_return_inputs_grad_dict:
+                        inputs_grad_dict[name] = v
+                var_dict[slot_name] = var_list
+            else:
+                nplist_value_temp = None
+                name_temp = None
+                if isinstance(np_list[name], list):
+                    nplist_value_temp = np_list[name][0]
+                    name_temp = name
+                else:
+                    nplist_value_temp = np_list[name]
+                    name_temp = unique_name.generate("%s_out" % (name))
+                v = create_var(
+                    nplist_value_temp,
+                    name_temp,
+                    is_input,
+                    if_return_inputs_grad_dict,
+                )
+                var_dict[name].append(v)
+                if if_return_inputs_grad_dict:
+                    inputs_grad_dict[name] = v
+
+        if if_return_inputs_grad_dict:
+            return var_dict, inputs_grad_dict
+        else:
+            return var_dict
+
+    def _check_api_outs_by_dygraph_outs(self, api_outs, dygraph_outs, place):
+        """for quick verify, here we take a simplest strategy:
+        1. we only check variable in api_outs.
+        2. we simply check the numpy (tensor) .
+        3. we set atol and rtol as 1e-5, because they are unrelated to dtype.
+        """
+        for name in api_outs:
+            np_api = np.array(api_outs[name])
+            np_dyg = np.array(dygraph_outs[name])
+            np.testing.assert_allclose(
+                np_api,
+                np_dyg,
+                rtol=1e-05,
+                equal_nan=False,
+                err_msg='Output ('
+                + name
+                + ') has diff at '
+                + str(place)
+                + '\nExpect '
+                + str(np_dyg)
+                + '\n'
+                + 'But Got'
+                + str(np_api)
+                + ' in class '
+                + self.__class__.__name__,
+            )
+
+    def _calc_python_api_output(self, place, egr_inps=None, egr_oups=None):
+        """set egr_inps and egr_oups = None if you want to create it by yourself."""
+
+        def prepare_python_api_arguments(
+            api, op_proto_ins, op_proto_attrs, kernel_sig
+        ):
+            """map from `op proto inputs and attrs` to `api input list and api attrs dict`
+
+            NOTE: the op_proto_attrs and op_proto_ins is a default dict. default value is []
+            """
+
+            class Empty:
+                pass
+
+            def is_empty(a):
+                return isinstance(a, Empty)
+
+            def get_default(idx, defaults):
+                assert not isinstance(defaults[idx], Empty), (
+                    "%d-th params of python api don't have default value." % idx
+                )
+                return defaults[idx]
+
+            def to_defaults_list(params, defaults):
+                return [defaults[p] for p in params if p in defaults]
+
+            def parse_attri_value(name, op_inputs, op_attrs):
+                """parse true value from inputs and attrs, if there is no name passed by OpTest, return Empty
+                1. if the name in op_attrs, use the op_attrs[name]
+                2. if the name in op_inputs, convert the op_inputs to [type of default value]
+                3. if the name not in op_attrs ans op_inputs, return Empty. (this will use the default value from python api)
+                """
+                if name in op_proto_attrs:
+                    return op_proto_attrs[name]
+                elif name in op_inputs:
+                    if len(op_inputs[name]) == 1:
+                        # why don't use numpy().item() : if the Tensor is float64, we will change it to python.float32, where we loss accuracy: [allclose_op]
+                        # why we reconstruct a tensor: because we want the tensor in cpu.
+                        return paddle.to_tensor(
+                            op_inputs[name][0].numpy(), place='cpu'
+                        )
+                    else:
+                        # if this is a list (test_unsqueeze2_op): we just pass it into the python api.
+                        return op_inputs[name]
+                else:
+                    return Empty()
+
+            # NOTE(xiongkun): the logic of constructing parameters:
+            # for example:
+            #    python api: cumprod(x, dim, dtype=None, name=None)
+            #    kernel sig: [["x"], ["dim"], ["out"]]"
+            #
+            # we will construct a lot of list with the same length : len == len(api_params), here is 4
+            #    api_params = ["x", "dim", "dtype", "name"]
+            #    api_defaults = [Empty, Empty, None, None]; empty means no defaults.
+            #    inputs_and_attrs = ["x", "dim"] , the length may shorter or longer than api_params
+            #    input_arguments = [RealValue in self.inputs and self.attrs]
+            # then ,we will loop for the api_params, construct a result list:
+            #    if the name in ['name', 'dtype', 'out', 'output'], we will use the default value
+            #    else, we will consume a input_arguments. (because the name is not corresponding, so we only use the order)
+
+            api_params, api_defaults = parse_arg_and_kwargs(api)
+            api_defaults = to_defaults_list(api_params, api_defaults)
+            api_defaults = [
+                Empty() for i in range(len(api_params) - len(api_defaults))
+            ] + api_defaults
+            assert len(api_defaults) == len(
+                api_params
+            ), "Error happens. contack xiongkun03 to solve."
+            inputs_sig, attrs_sig, outputs_sig = kernel_sig
+            inputs_and_attrs = inputs_sig + attrs_sig
+            input_arguments = [
+                op_proto_ins.get(name, Empty()) for name in inputs_sig
+            ] + [
+                parse_attri_value(name, op_proto_ins, op_proto_attrs)
+                for name in attrs_sig
+            ]
+            results = []
+            api_ignore_param_list = set(['name', 'dtype', 'out', 'output'])
+            idx_of_op_proto_arguments = 0
+            for idx, arg_name in enumerate(api_params):
+                if arg_name in api_ignore_param_list:
+                    results.append(get_default(idx, api_defaults))
+                else:
+                    if idx_of_op_proto_arguments < len(input_arguments):
+                        tmp = input_arguments[idx_of_op_proto_arguments]
+                        idx_of_op_proto_arguments += 1
+                    else:
+                        tmp = Empty()  # use the default value
+
+                    if isinstance(tmp, Empty):
+                        results.append(get_default(idx, api_defaults))
+                    else:
+                        results.append(tmp)
+            assert len(results) == len(api_params)
+            return results
+
+        def construct_output_dict_by_kernel_sig(ret_tuple, output_sig):
+            if hasattr(self, "python_out_sig"):
+                output_sig = self.python_out_sig
+            if not isinstance(ret_tuple, (tuple, list)):
+                ret_tuple = [ret_tuple]
+            if len(output_sig) == len(ret_tuple):
+                # [assumption]: we assume {"Out": [Tensor]}
+                return {a: [b] for a, b in zip(output_sig, ret_tuple)}
+            else:
+                # [assumption]: return multi-Tensor in a single output. such as paddle.split()
+                assert (
+                    len(output_sig) == 1
+                ), "Don't support multi-output with multi-tensor output. (May be you can use set `python_out_sig`, see `test_squeeze2_op` as a example.)"
+                return {output_sig[0]: ret_tuple}
+
+        def assumption_assert_and_transform(args, inp_num):
+            """
+            transform inputs by the following rules:
+                1. [Tensor] -> Tensor
+                2. [Tensor, Tensor, ...] -> list of Tensors
+                3. None -> None
+                4. Others: raise Error
+
+            only support "X" is list of Tensor, currently don't support other structure like dict.
+            """
+            inp_args = [
+                [inp] if inp is None else inp for inp in args[:inp_num]
+            ]  # convert None -> [None]
+            for inp in inp_args:
+                assert isinstance(
+                    inp, list
+                ), "currently only support `X` is [Tensor], don't support other structure."
+            args = [
+                inp[0] if len(inp) == 1 else inp for inp in inp_args
+            ] + args[inp_num:]
+            return args
+
+        def _get_kernel_signature(
+            dygraph_tensor_inputs, dygraph_tensor_outputs, attrs_outputs
+        ):
+            try:
+                kernel_sig = _dygraph_tracer()._get_kernel_signature(
+                    self.op_type,
+                    dygraph_tensor_inputs,
+                    dygraph_tensor_outputs,
+                    attrs_outputs,
+                )
+            except RuntimeError as re:
+                """we think the kernel_sig is missing."""
+                kernel_sig = None
+                print(
+                    "[Warning: op_test.py] Kernel Signature is not found for %s, fall back to intermediate state."
+                    % self.op_type
+                )
+            return kernel_sig
+
+        def cal_python_api(python_api, args, kernel_sig):
+            inputs_sig, attrs_sig, outputs_sig = kernel_sig
+            args = assumption_assert_and_transform(args, len(inputs_sig))
+            ret_tuple = python_api(*args)
+            return construct_output_dict_by_kernel_sig(ret_tuple, outputs_sig)
+
+        with fluid.dygraph.base.guard(place=place):
+            block = fluid.default_main_program().global_block()
+            op_proto = OpProtoHolder.instance().get_op_proto(self.op_type)
+            # prepare input variable
+            dygraph_tensor_inputs = (
+                egr_inps
+                if egr_inps
+                else self.append_input_output_for_dygraph(
+                    op_proto, self.inputs, True, False, block
+                )
+            )
+            # prepare output variable
+            dygraph_tensor_outputs = (
+                egr_oups
+                if egr_oups
+                else self.append_input_output_for_dygraph(
+                    op_proto, self.outputs, False, False, block
+                )
+            )
+
+            # prepare attributes
+            attrs_outputs = {}
+            if hasattr(self, "attrs"):
+                for attrs_name in self.attrs:
+                    if self.attrs[attrs_name] is not None:
+                        attrs_outputs[attrs_name] = self.attrs[attrs_name]
+
+            kernel_sig = _get_kernel_signature(
+                dygraph_tensor_inputs, dygraph_tensor_outputs, attrs_outputs
+            )
+            if not kernel_sig:
+                return None
+            assert hasattr(self, "python_api"), (
+                "Detect there is KernelSignature for `%s` op, please set the `self.python_api` if you set check_dygraph = True"
+                % self.op_type
+            )
+            args = prepare_python_api_arguments(
+                self.python_api,
+                dygraph_tensor_inputs,
+                attrs_outputs,
+                kernel_sig,
+            )
+            """ we directly return the cal_python_api value because the value is already tensor.
+            """
+            return cal_python_api(self.python_api, args, kernel_sig)
+
+    def _calc_dygraph_output(self, place, parallel=False, no_check_set=None):
+        self.__class__.op_type = (
+            self.op_type
+        )  # for ci check, please not delete it for now
+        with fluid.dygraph.base.guard(place=place):
+            block = fluid.default_main_program().global_block()
+
+            op_proto = OpProtoHolder.instance().get_op_proto(self.op_type)
+
+            # prepare input variable
+            inputs = self.append_input_output_for_dygraph(
+                op_proto, self.inputs, True, False, block
+            )
+            # prepare output variable
+            outputs = self.append_input_output_for_dygraph(
+                op_proto, self.outputs, False, False, block
+            )
+
+            # prepare attributes
+            attrs_outputs = {}
+            if hasattr(self, "attrs"):
+                for attrs_name in self.attrs:
+                    if self.attrs[attrs_name] is not None:
+                        attrs_outputs[attrs_name] = self.attrs[attrs_name]
+
+            block.append_op(
+                type=self.op_type,
+                inputs=inputs,
+                outputs=outputs,
+                attrs=attrs_outputs if hasattr(self, "attrs") else None,
+            )
+            return outputs
+
+    def _calc_output(
+        self,
+        place,
+        parallel=False,
+        no_check_set=None,
+        loss=None,
+        enable_inplace=None,
+        for_inplace_test=None,
+    ):
+        program = Program()
+        block = program.global_block()
+        op = self._append_ops(block)
+
+        inputs = self._get_inputs(block)
+        outputs = self._get_outputs(block)
+        feed_map = self.feed_var(inputs, place)
+
+        if for_inplace_test:
+            # Some variables' tensors hold no buffer (tensor's _holder is NULL), like XShape in reshape2 op,
+            # and the shapes of those variables contain 0 (eg. Xshape.shape = [0, 2, 5]).
+            # Set persistable for those variables in order to get them from global_scope for inplace grad test directly other than feed them,
+            # since feed op calls check_memory_size() which fails when tensor's holder_ is NULL.
+            for out_name in op.output_arg_names:
+                var = block.var(out_name)
+                if 0 in var.shape:
+                    var.persistable = True
+        original_program = program
+        if parallel:
+            use_cuda = False
+            if isinstance(place, fluid.CUDAPlace):
+                use_cuda = True
+            compiled_prog = fluid.CompiledProgram(program).with_data_parallel(
+                loss_name=loss.name if loss else None, places=place
+            )
+            program = compiled_prog
+        fetch_list = getattr(self, "fetch_list", [])
+        # if the fetch_list is customized by user, we use it directly.
+        # if not, fill the fetch_list by the user configured outputs in test.
+        if len(fetch_list) == 0:
+            for var_name, var in outputs.items():
+                if no_check_set is not None and var_name in no_check_set:
+                    continue
+                if isinstance(var, list):
+                    for v in var:
+                        fetch_list.append(v.name)
+                else:
+                    fetch_list.append(var.name)
+        # if the fetch_list still empty, fill the fetch_list by the operator output.
+        if len(fetch_list) == 0:
+            for out_name, out_dup in Operator.get_op_outputs(self.op_type):
+                fetch_list.append(str(out_name))
+
+        if enable_inplace is not None:
+            build_strategy = fluid.BuildStrategy()
+            build_strategy.enable_inplace = enable_inplace
+
+            compiled_prog = fluid.CompiledProgram(program).with_data_parallel(
+                build_strategy=build_strategy, places=place
+            )
+            program = compiled_prog
+
+        executor = Executor(place)
+        outs = executor.run(
+            program, feed=feed_map, fetch_list=fetch_list, return_numpy=False
+        )
+        self.op = op
+        self.program = original_program
+        if for_inplace_test:
+            return outs, fetch_list, feed_map, original_program, op.desc
+        else:
+            return outs, fetch_list
+
+    def _compare_expect_and_actual_outputs(
+        self, place, fetch_list, expect_outs, actual_outs, inplace_atol=None
+    ):
+        """Compare expect outs and actual outs of an tested op.
+
+        Args:
+            place (CPUPlace | CUDAPlace): The place where the op runs.
+            fetch_list (list): The outputs of tested op.
+            expect_outs (list): The expect outs of tested op.
+            actual_outs (list): The actual outs of tested op.
+            inplace_atol (float): The tolerable error, only set when tested op doesn't ensure computational consistency, like group_norm op.
+
+        Returns:
+            None.
+        """
+        # compare expect_outs and actual_outs
+        for i, name in enumerate(fetch_list):
+            # Note(zhiqiu): inplace_atol should be only set when op doesn't ensure
+            # computational consistency.
+            # When inplace_atol is not None, the inplace check uses numpy.allclose
+            # to check inplace result instead of numpy.array_equal.
+            expect_out = np.array(expect_outs[i])
+            actual_out = np.array(actual_outs[i])
+            if inplace_atol is not None:
+                np.testing.assert_allclose(
+                    expect_out,
+                    actual_out,
+                    rtol=1e-05,
+                    atol=inplace_atol,
+                    err_msg='Output ('
+                    + name
+                    + ') has diff at '
+                    + str(place)
+                    + ' when using and not using inplace'
+                    + '\nExpect '
+                    + str(expect_out)
+                    + '\n'
+                    + 'But Got'
+                    + str(actual_out)
+                    + ' in class '
+                    + self.__class__.__name__,
+                )
+            else:
+                np.testing.assert_array_equal(
+                    expect_out,
+                    actual_out,
+                    err_msg='Output ('
+                    + name
+                    + ') has diff at '
+                    + str(place)
+                    + ' when using and not using inplace'
+                    + '\nExpect '
+                    + str(expect_out)
+                    + '\n'
+                    + 'But Got'
+                    + str(actual_out)
+                    + ' in class '
+                    + self.__class__.__name__
+                    + '\n',
+                )
+
+    def _construct_grad_program_from_forward(
+        self, fwd_program, grad_op_desc, op_grad_to_var
+    ):
+        """Generate grad_program which contains the grad_op.
+
+        Args:
+            fwd_program (tuple): The program that contains grad_op_desc's corresponding forward op.
+            grad_op_desc (OpDesc): The OpDesc of grad op.
+            op_grad_to_var (dict): The relation of variables in grad op and its forward op.
+
+        Returns:
+            grad_program (program): The program which contains the grad_op.
+        """
+        grad_program = Program()
+        grad_block = grad_program.global_block()
+        new_op_desc = grad_block.desc.append_op()
+        new_op_desc.copy_from(grad_op_desc)
+        grad_program._sync_with_cpp()
+
+        # Create grad vars based on fwd vars (shape and dtype)
+        for arg in (
+            grad_op_desc.input_arg_names() + grad_op_desc.output_arg_names()
+        ):
+            fwd_var_name = op_grad_to_var.get(arg, None)
+            if fwd_var_name is None:
+                fwd_var_name = arg
+            fwd_var = fwd_program.global_block().vars.get(fwd_var_name)
+            assert fwd_var is not None, "{} cannot be found".format(
+                fwd_var_name
+            )
+            grad_var = grad_block.create_var(
+                name=arg,
+                dtype=fwd_var.dtype,
+                shape=fwd_var.shape,
+                type=fwd_var.type,
+                persistable=False,
+            )
+
+            # Some variables' tensors hold no buffer (tensor's _holder is NULL), like XShape in reshape2 op,
+            # and the shapes of those variables contain 0 (eg. Xshape.shape = [0, 2, 5]).
+            # Set persistable for those variables in order to get them from global_scope for inplace grad test directly other than feed them,
+            # since feed op calls check_memory_size() which fails when tensor's holder_ is NULL.
+            if 0 in grad_var.shape:
+                grad_var.persistable = True
+        grad_program._sync_with_cpp()
+        return grad_program
+
+    def _construct_grad_feed_map_from_forward(
+        self, place, fwd_res, grad_op_desc, op_grad_to_var
+    ):
+        """Generate grad_feed_map for grad_program.
+
+        since we don`t really check gradient accuracy, but check the consistency when using and not using inplace,
+        we use fwd outs (also inputs sometimes) to construct grad inputs.
+
+        Args:
+            place (CPUPlace | CUDAPlace): The place where the op runs.
+            fwd_res (tuple): The outputs of its forward op, in the same form as returns of _calc_outputs() when for_inplace_test is True.
+                i.e., tuple(fwd_outs, fwd_fetch_list, fwd_feed_map, fwd_program, fwd_op_desc)
+            grad_op_desc (OpDesc): The OpDesc of grad op.
+            op_grad_to_var (dict): The relation of variables in grad op and its fwd_op.
+
+        Returns:
+            grad_feed_map (dict): The feed_map of grad_op.
+        """
+        (
+            fwd_outs,
+            fwd_fetch_list,
+            fwd_feed_map,
+            fwd_program,
+            fwd_op_desc,
+        ) = fwd_res
+        p = core.Place()
+        p.set_place(place)
+        grad_feed_map = {}
+        for arg in grad_op_desc.input_arg_names():
+            if arg in fwd_feed_map.keys():
+                grad_feed_map[arg] = fwd_feed_map[arg]._copy(p)
+            else:
+                fwd_var_name = op_grad_to_var.get(arg, None)
+                if fwd_var_name is None:
+                    fwd_var_name = arg
+
+                for i, out_name in enumerate(fwd_fetch_list):
+                    if out_name == fwd_var_name:
+                        # don't feed variables whose tensors hold no buffer (shape contains 0 like shape = [0,2,5] and holder_ is NULL), like XShape in reshape2 op.
+                        # get them from global_scope directly since we have set them persistable in fwd execution
+                        if 0 in fwd_program.global_block().var(out_name).shape:
+                            continue
+                        else:
+                            grad_feed_map[arg] = fwd_outs[i]._copy(p)
+
+        return grad_feed_map
+
+    def _get_need_run_ops(self, op_desc, fwd_op_desc=None):
+        """Postorder traversal of the 'grad' tree to get all ops that need to run during inplace test.
+        An op needs to run druing inplace check if,
+        (1) it has infer_inplace,
+        (2) it has infer_inplace in its grad descendants. (since we need its outputs as to construct its grad's inputs)
+
+        Args:
+            op_desc (OpDesc): The op_desc of current op.
+            fwd_op_desc (OpDesc): The op_desc of current op's forward op, None if current op has no forward op.
+                Eg. relu's fwd_op is None, relu_grad's fwd_op is relu, relu_grad_grad's fwd_op is relu_grad, etc.
+
+        Returns:
+            need_run_ops (list[(op_desc, fwd_op_desc)]): The ops that need to run during inplace test.
+        """
+        need_run_ops = []
+        visited_ops = []
+
+        def _dfs_grad_op(op_desc, fwd_op_desc=None):
+            visited_ops.append(op_desc.type())
+            has_infer_inplace = fluid.core.has_infer_inplace(op_desc.type())
+            has_grad_op_maker = fluid.core.has_grad_op_maker(op_desc.type())
+            has_infer_inplace_in_grad_descendants = False
+            if not has_grad_op_maker:
+                has_infer_inplace_in_descendants = False
+            else:
+                # get grad_op_desc
+                grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc(
+                    op_desc, set(), []
+                )
+                if not grad_op_desc_list:
+                    has_infer_inplace_in_grad_descendants = False
+                else:
+                    for i, grad_op_desc in enumerate(grad_op_desc_list):
+                        if (
+                            grad_op_desc.type() not in visited_ops
+                            and _dfs_grad_op(grad_op_desc, fwd_op_desc=op_desc)
+                        ):
+                            has_infer_inplace_in_grad_descendants = True
+            if has_infer_inplace or has_infer_inplace_in_grad_descendants:
+                need_run_ops.append((op_desc, fwd_op_desc))
+                return True
+            else:
+                return False
+
+        _dfs_grad_op(op_desc, fwd_op_desc=fwd_op_desc)
+        return need_run_ops
+
+    def _check_forward_inplace(
+        self, place, no_check_set=None, inplace_atol=None
+    ):
+        """Check the inplace correctness of given op (self.op_type).
+        Run the op twice with same inputs, one enable inplace and another disable, compare their outputs.
+
+        Args:
+            place (CPUPlace | CUDAPlace): The place where the op runs.
+            no_check_set (list): The names of outputs that needn't check, like XShape of reshape op.
+            inplace_atol (float): The tolerable error, only set when op doesn't ensure computational consistency, like group_norm op.
+
+        Returns:
+            expect_res (tuple(outs, fetch_list, feed_map, program, op_desc)): The results of given op.
+                We return this to construct grad_program and grad_feed_map for grad inplace check.
+        """
+        # _calc_output() returns in the form tuple(outs, fetch_list, feed_map, program, op_desc) when for_inplace_test=True.
+        expect_res = self._calc_output(
+            place,
+            no_check_set=no_check_set,
+            enable_inplace=False,
+            for_inplace_test=True,
+        )
+        actual_res = self._calc_output(
+            place,
+            no_check_set=no_check_set,
+            enable_inplace=True,
+            for_inplace_test=True,
+        )
+        # compare expect_outs and actual_outs
+        self._compare_expect_and_actual_outputs(
+            place,
+            expect_res[1],
+            expect_res[0],
+            actual_res[0],
+            inplace_atol=inplace_atol,
+        )
+        return expect_res
+
+    def _calc_grad_output(
+        self, place, fwd_res, grad_op_desc, enable_inplace=None
+    ):
+        """Calculate grad_output for given grad_op_desc.
+
+        since we don`t really check gradient accuracy, but check the consistency when using and not using inplace,
+        we use fwd outs (also inputs sometimes) to construct grad inputs.
+
+        Args:
+            place (CPUPlace | CUDAPlace): The place where the op runs.
+            fwd_res (tuple): The outputs of its forward op, in the same form as returns of _calc_outputs() when for_inplace_test is True.
+                i.e., tuple(fwd_outs, fwd_fetch_list, fwd_feed_map, fwd_program, fwd_op_desc).
+            grad_op_desc (OpDesc): The OpDesc of grad op.
+            enable_inplace (bool): Enable inplace or not.
+
+        Returns:
+            res (tuple(outs, fetch_list, feed_map, program, op_desc)): The results of given grad_op_desc.
+        """
+        (
+            fwd_outs,
+            fwd_fetch_list,
+            fwd_feed_map,
+            fwd_program,
+            fwd_op_desc,
+        ) = fwd_res
+        grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc(
+            fwd_op_desc, set(), []
+        )
+        grad_program = self._construct_grad_program_from_forward(
+            fwd_program, grad_op_desc, op_grad_to_var
+        )
+        grad_feed_map = self._construct_grad_feed_map_from_forward(
+            place, fwd_res, grad_op_desc, op_grad_to_var
+        )
+        grad_fetch_list = grad_op_desc.output_arg_names()
+        exe = Executor(place)
+        program = grad_program
+        if enable_inplace is not None:
+            build_strategy = fluid.BuildStrategy()
+            build_strategy.enable_inplace = enable_inplace
+            compiled_program = fluid.CompiledProgram(
+                grad_program
+            ).with_data_parallel(
+                loss_name="", build_strategy=build_strategy, places=place
+            )
+            program = compiled_program
+
+        outs = exe.run(
+            program,
+            feed=grad_feed_map,
+            fetch_list=grad_fetch_list,
+            return_numpy=False,
+        )
+        return outs, grad_fetch_list, grad_feed_map, grad_program, grad_op_desc
+
+    def _check_grad_inplace(
+        self, place, fwd_res, grad_op_desc, inplace_atol=None
+    ):
+        """Check the inplace correctness of given grad_op_desc.
+
+        Run the grad op twice with same inputs, one enable inplace and another disable, compare their outputs.
+        It works like _check_forward_inplace, but the way to construct program and feed_map differs.
+        So we define a new function for grad, grad_grad, etc.
+
+        Args:
+            place (CPUPlace | CUDAPlace): The place where the op runs.
+            fwd_res (tuple): The outputs of its forward op, in the same form as returns of _calc_outputs() when for_inplace_test is True.
+                i.e., tuple(fwd_outs, fwd_fetch_list, fwd_feed_map, fwd_program, fwd_op_desc).
+            grad_op_desc (OpDesc): The OpDesc of grad op.
+            inplace_atol (float): The tolerable error, only set when op doesn't ensure computational consistency, like group_norm op.
+
+        Returns:
+            expect_res (tuple(outs, fetch_list, feed_map, program, op_desc)): The results of given op.
+                We return this to construct grad_program and grad_feed_map for grad inplace check.
+        """
+        expect_res = self._calc_grad_output(
+            place, fwd_res, grad_op_desc, enable_inplace=False
+        )
+        actual_res = self._calc_grad_output(
+            place, fwd_res, grad_op_desc, enable_inplace=True
+        )
+
+        self._compare_expect_and_actual_outputs(
+            place,
+            expect_res[1],
+            expect_res[0],
+            actual_res[0],
+            inplace_atol=inplace_atol,
+        )
+        return expect_res
+
+    def check_inplace_output_with_place(
+        self, place, no_check_set=None, inplace_atol=None
+    ):
+        """Chech the inplace correctness of given op, its grad op, its grad_grad op, etc.
+
+        (1) Get all ops need to run. (see conditions in _get_need_run_ops())
+        (2) Run op in need_run_ops, and do inplace check if it has infer_inplace.
+
+        Args:
+            place (CPUPlace | CUDAPlace): The place where the op runs.
+            no_check_set (list): The names of outputs that needn't check, like XShape of reshape op.
+            inplace_atol (float): The tolerable error, only set when op doesn't ensure computational consistency, like group_norm op.
+
+        Returns:
+            None
+        """
+        if getattr(self, "no_need_check_inplace", False):
+            return
+
+        has_infer_inplace = fluid.core.has_infer_inplace(self.op_type)
+        has_grad_op_maker = fluid.core.has_grad_op_maker(self.op_type)
+
+        fwd_res = self._calc_output(
+            place, no_check_set=no_check_set, for_inplace_test=True
+        )
+        op_desc = fwd_res[4]
+        need_run_ops = self._get_need_run_ops(op_desc)
+
+        res = {}
+        if hasattr(self, 'attrs') and bool(self.attrs.get('use_xpu', False)):
+            return
+        for op_desc, father_op_desc in reversed(need_run_ops):
+            # The first one is the forward op
+            has_infer_inplace = fluid.core.has_infer_inplace(op_desc.type())
+            if op_desc.type() == self.op_type:
+                if has_infer_inplace:
+                    res[op_desc] = self._check_forward_inplace(
+                        place,
+                        no_check_set=no_check_set,
+                        inplace_atol=inplace_atol,
+                    )
+                else:
+                    res[op_desc] = self._calc_output(
+                        place, no_check_set=no_check_set, for_inplace_test=True
+                    )
+            else:
+                # TODO(zhiqiu): enhance inplace_grad test for ops (sum and activation) using mkldnn
+                # skip op that use_mkldnn currently
+                flags_use_mkldnn = fluid.core.globals()["FLAGS_use_mkldnn"]
+                attrs_use_mkldnn = hasattr(self, 'attrs') and bool(
+                    self.attrs.get('use_mkldnn', False)
+                )
+                if flags_use_mkldnn or attrs_use_mkldnn:
+                    warnings.warn(
+                        "check inplace_grad for ops using mkldnn is not supported"
+                    )
+                    continue
+                if has_infer_inplace:
+                    fwd_res = res[father_op_desc]
+                    res[op_desc] = self._check_grad_inplace(
+                        place, fwd_res, op_desc, inplace_atol=inplace_atol
+                    )
+                else:
+                    res[op_desc] = self._calc_grad_output(
+                        place, fwd_res, op_desc
+                    )
+
+    def check_output_with_place(
+        self,
+        place,
+        atol=0,
+        no_check_set=None,
+        equal_nan=False,
+        check_dygraph=True,
+        inplace_atol=None,
+    ):
+        def find_imperative_actual(target_name, dygraph_outs, place):
+            for name in dygraph_outs:
+                if name == target_name:
+                    return dygraph_outs[name][0]
+                var_list = dygraph_outs[name]
+                for i, var in enumerate(var_list):
+                    if var.name == target_name:
+                        return dygraph_outs[name][i]
+            self.assertTrue(
+                False,
+                "Found failed {} {}".format(dygraph_outs.keys(), target_name),
+            )
+
+        def find_actual(target_name, fetch_list):
+            found = [
+                i
+                for i, var_name in enumerate(fetch_list)
+                if var_name == target_name
+            ]
+            self.assertTrue(
+                len(found) == 1, "Found {} {}".format(len(found), target_name)
+            )
+            return found[0]
+
+        class Checker:
+            """base class for check with self.outputs.
+            currently don't support check between checkers.
+            """
+
+            def __init__(self, op_test, expect_dict):
+                """expect_dict is the self.outputs
+                support : {str: [numpy]} and {str: [(str, numpy), (str, numpy)]}
+                """
+                self.expects = expect_dict
+                self.checker_name = "checker"
+                self.op_test = op_test  # stop the op_test object.
+                self.op_type = op_test.op_type
+
+            def init(self):
+                pass
+
+            def convert_uint16_to_float(self, actual_np, expect_np):
+                raise NotImplementedError("base class, not implement!")
+
+            def calculate_output(self):
+                """
+                judge whether convert current output and expect to uint16.
+                return True | False
+                """
+
+            def _is_skip_name(self, name):
+                if name not in self.expects:
+                    return True
+                if no_check_set is not None and name in no_check_set:
+                    return True
+                return False
+
+            def find_actual_value(self, name):
+                """return: (actual_tensor(var_base), actual_numpy)"""
+                raise NotImplementedError("base class, not implement!")
+
+            def _compare_numpy(self, name, actual_np, expect_np):
+                self.op_test.assertTrue(
+                    np.allclose(
+                        actual_np,
+                        expect_np,
+                        atol=atol,
+                        rtol=self.rtol if hasattr(self, 'rtol') else 1e-5,
+                        equal_nan=equal_nan,
+                    ),
+                    "Output ("
+                    + name
+                    + ") has diff at "
+                    + str(place)
+                    + " in "
+                    + self.checker_name,
+                )
+
+            def _compare_list(self, name, actual, expect):
+                """if expect is a tuple, we need to compare list."""
+                raise NotImplementedError("base class, not implement!")
+
+            def compare_single_output_with_expect(self, name, expect):
+                actual, actual_np = self.find_actual_value(name)
+                expect_np = expect[0] if isinstance(expect, tuple) else expect
+                actual_np, expect_np = self.convert_uint16_to_float_ifneed(
+                    actual_np, expect_np
+                )
+                # NOTE(zhiqiu): np.allclose([], [1.]) returns True
+                # see details: https://stackoverflow.com/questions/38331703/why-does-numpys-broadcasting-sometimes-allow-comparing-arrays-of-different-leng
+                if expect_np.size == 0:
+                    self.op_test.assertTrue(actual_np.size == 0)
+                self._compare_numpy(name, actual_np, expect_np)
+                if isinstance(expect, tuple):
+                    self._compare_list(name, actual, expect)
+
+            def compare_outputs_with_expects(self):
+                for out_name, out_dup in Operator.get_op_outputs(self.op_type):
+                    if self._is_skip_name(out_name):
+                        continue
+                    if out_dup:
+                        # if self.output = {'name': [(subname, Tensor), (subname, Tensor)]}
+                        sub_out = self.expects[out_name]
+                        if not isinstance(sub_out, list):
+                            raise AssertionError(
+                                "sub_out type %s is not list", type(sub_out)
+                            )
+                        for item in sub_out:
+                            sub_out_name, expect = item[0], item[1]
+                            self.compare_single_output_with_expect(
+                                sub_out_name, expect
+                            )
+                    else:
+                        expect = self.expects[out_name]
+                        self.compare_single_output_with_expect(out_name, expect)
+
+            def check(self):
+                """
+                return None means ok, raise Error means failed.
+
+                the main enter point of Checker class
+                """
+                self.init()
+                self.calculate_output()
+                self.compare_outputs_with_expects()
+
+        class StaticChecker(Checker):
+            def init(self):
+                self.checker_name = "static checker"
+
+            def calculate_output(self):
+                outs, fetch_list = self.op_test._calc_output(
+                    place, no_check_set=no_check_set
+                )
+                self.outputs = outs
+                self.fetch_list = fetch_list
+
+            def find_actual_value(self, name):
+                idx = find_actual(name, self.fetch_list)
+                actual = self.outputs[idx]
+                actual_t = np.array(actual)
+                return actual, actual_t
+
+            def convert_uint16_to_float_ifneed(self, actual_np, expect_np):
+                """
+                judge whether convert current output and expect to uint16.
+                return True | False
+                """
+                if actual_np.dtype == np.uint16 and expect_np.dtype in [
+                    np.float32,
+                    np.float64,
+                ]:
+                    actual_np = convert_uint16_to_float(actual_np)
+                    self.rtol = 1.0e-2
+                else:
+                    self.rtol = 1.0e-5
+                if (
+                    expect_np.dtype == np.uint16
+                    and actual_np.dtype == np.uint16
+                ):
+                    nonlocal atol
+                    expect_np = convert_uint16_to_float(expect_np)
+                    actual_np = convert_uint16_to_float(actual_np)
+                    atol = max(atol, 0.03)
+                return actual_np, expect_np
+
+            def _compare_list(self, name, actual, expect):
+                """if expect is a tuple, we need to compare list."""
+                self.op_test.assertListEqual(
+                    actual.recursive_sequence_lengths(),
+                    expect[1],
+                    "Output (" + name + ") has different lod at " + str(place),
+                )
+
+        class DygraphChecker(Checker):
+            def init(self):
+                self.checker_name = "dygraph checker"
+
+            def calculate_output(self):
+                # we only check end2end api when check_dygraph=True
+                self.is_python_api_test = True
+                dygraph_outs = self.op_test._calc_python_api_output(place)
+                if dygraph_outs is None:
+                    self.is_python_api_test = False
+                    # missing KernelSignature, fall back to eager middle output.
+                    dygraph_outs = self.op_test._calc_dygraph_output(
+                        place, no_check_set=no_check_set
+                    )
+                self.outputs = dygraph_outs
+
+            def _compare_numpy(self, name, actual_np, expect_np):
+                if (
+                    functools.reduce(lambda x, y: x * y, actual_np.shape, 1)
+                    == 0
+                    and functools.reduce(lambda x, y: x * y, expect_np.shape, 1)
+                    == 0
+                ):
+                    pass
+                else:
+                    self.op_test.assertTrue(
+                        np.allclose(
+                            actual_np,
+                            expect_np,
+                            atol=atol,
+                            rtol=self.rtol if hasattr(self, 'rtol') else 1e-5,
+                            equal_nan=equal_nan,
+                        ),
+                        "Output ("
+                        + name
+                        + ") has diff at "
+                        + str(place)
+                        + " in "
+                        + self.checker_name,
+                    )
+
+            def convert_uint16_to_float_ifneed(self, actual_np, expect_np):
+                if actual_np.dtype == np.uint16 and expect_np.dtype in [
+                    np.float32,
+                    np.float64,
+                ]:
+                    self.rtol = 1.0e-2
+                else:
+                    self.rtol = 1.0e-5
+                if self.op_test.is_bfloat16_op():
+                    if actual_np.dtype == np.uint16:
+                        actual_np = convert_uint16_to_float(actual_np)
+                    if expect_np.dtype == np.uint16:
+                        expect_np = convert_uint16_to_float(expect_np)
+                return actual_np, expect_np
+
+            def find_actual_value(self, name):
+                with fluid.dygraph.base.guard(place=place):
+                    imperative_actual = find_imperative_actual(
+                        name, self.outputs, place
+                    )
+                    imperative_actual_t = np.array(
+                        imperative_actual.value().get_tensor()
+                    )
+                    return imperative_actual, imperative_actual_t
+
+            def _compare_list(self, name, actual, expect):
+                """if expect is a tuple, we need to compare list."""
+                with fluid.dygraph.base.guard(place=place):
+                    self.op_test.assertListEqual(
+                        actual.value()
+                        .get_tensor()
+                        .recursive_sequence_lengths(),
+                        expect[1],
+                        "Output ("
+                        + name
+                        + ") has different lod at "
+                        + str(place)
+                        + " in dygraph mode",
+                    )
+
+            def _is_skip_name(self, name):
+                # if in final state and kernel signature don't have name, then skip it.
+                if (
+                    self.is_python_api_test
+                    and hasattr(self.op_test, "python_out_sig")
+                    and name not in self.op_test.python_out_sig
+                ):
+                    return True
+                return super()._is_skip_name(name)
+
+        # set some flags by the combination of arguments.
+        self.infer_dtype_from_inputs_outputs(self.inputs, self.outputs)
+        if (
+            self.dtype == np.float64
+            and self.op_type
+            not in op_threshold_white_list.NEED_FIX_FP64_CHECK_OUTPUT_THRESHOLD_OP_LIST
+        ):
+            atol = 0
+
+        if self.is_bfloat16_op():
+            if self.is_mkldnn_op():
+                check_dygraph = False
+
+                if hasattr(self, 'force_fp32_output') and getattr(
+                    self, 'force_fp32_output'
+                ):
+                    atol = 1e-2
+                else:
+                    atol = 2
+            else:
+                atol = 1e-1
+
+        if no_check_set is not None:
+            if (
+                self.op_type
+                not in no_check_set_white_list.no_check_set_white_list
+            ):
+                raise AssertionError(
+                    "no_check_set of op %s must be set to None." % self.op_type
+                )
+        static_checker = StaticChecker(self, self.outputs)
+        static_checker.check()
+        outs, fetch_list = static_checker.outputs, static_checker.fetch_list
+        if check_dygraph:
+            dygraph_checker = DygraphChecker(self, self.outputs)
+            dygraph_checker.check()
+            dygraph_dygraph_outs = dygraph_checker.outputs
+
+        # Note(zhiqiu): inplace_atol should be only set when op doesn't ensure
+        # computational consistency.
+        # For example, group_norm uses AtomicAdd on CUDAPlace, which do not ensure
+        # computation order when multiple threads write the same address. So the
+        # result of group_norm is non-deterministic when datatype is float.
+        # When inplace_atol is not None, the inplace check uses numpy.allclose
+        # to check inplace result instead of numpy.array_equal.
+        if inplace_atol is not None:
+            warnings.warn(
+                "inplace_atol should only be set when op doesn't ensure computational consistency, please check it!"
+            )
+        # Check inplace for given op, its grad op, its grad_grad op, etc.
+        # No effect on original OpTest
+        # Currently not support ParallelExecutor on XPUPlace.
+        if (
+            not paddle.is_compiled_with_xpu()
+            and not paddle.is_compiled_with_npu()
+            and not paddle.is_compiled_with_mlu()
+            and not isinstance(place, core.CustomPlace)
+        ):
+            self.check_inplace_output_with_place(
+                place, no_check_set=no_check_set, inplace_atol=inplace_atol
+            )
+
+        if check_dygraph:
+            return outs, dygraph_dygraph_outs, fetch_list
+        else:
+            return outs, fetch_list
+
+    def check_compile_vs_runtime(self, fetch_list, fetch_outs):
+        def find_fetch_index(target_name, fetch_list):
+            found = [
+                i
+                for i, var_name in enumerate(fetch_list)
+                if var_name == target_name
+            ]
+            if len(found) == 0:
+                return -1
+            else:
+                self.assertTrue(
+                    len(found) == 1,
+                    "Found {} {}".format(len(found), target_name),
+                )
+                return found[0]
+
+        for name in self.op.desc.output_names():
+            var_names = self.op.desc.output(name)
+            for var_name in var_names:
+                i = find_fetch_index(var_name, fetch_list)
+                if i == -1:
+                    # The output is dispensiable or intermediate.
+                    break
+                out = fetch_outs[i]
+                if isinstance(out, core.LoDTensor):
+                    lod_level_runtime = len(out.lod())
+                else:
+                    if isinstance(out, core.LoDTensorArray):
+                        warnings.warn(
+                            "The check of LoDTensorArray's lod_level is not implemented now!"
+                        )
+                    lod_level_runtime = 0
+
+                var = self.program.global_block().var(var_name)
+                if var.type == core.VarDesc.VarType.LOD_TENSOR:
+                    lod_level_compile = var.lod_level
+                else:
+                    lod_level_compile = 0
+                self.assertEqual(
+                    lod_level_compile,
+                    lod_level_runtime,
+                    "The lod_level of Output ("
+                    + name
+                    + ") is different between compile-time and runtime ("
+                    + str(lod_level_compile)
+                    + " vs "
+                    + str(lod_level_runtime)
+                    + ")",
+                )
+
+    def _get_places(self):
+        if self.dtype == np.float16:
+            if core.is_compiled_with_cuda() and core.op_support_gpu(
+                self.op_type
+            ):
+                place = core.CUDAPlace(0)
+                if core.is_float16_supported(place):
+                    return [place]
+                else:
+                    return []
+            else:
+                return []
+        places = [fluid.CPUPlace()]
+        cpu_only = self._cpu_only if hasattr(self, '_cpu_only') else False
+        if (
+            core.is_compiled_with_cuda()
+            and core.op_support_gpu(self.op_type)
+            and not cpu_only
+        ):
+            places.append(core.CUDAPlace(0))
+        return places
+
+    def check_output(
+        self,
+        atol=1e-5,
+        no_check_set=None,
+        equal_nan=False,
+        check_dygraph=True,
+        inplace_atol=None,
+    ):
+
+        self.__class__.op_type = self.op_type
+        if self.is_mkldnn_op():
+            self.__class__.use_mkldnn = True
+
+        if self.is_xpu_op():
+            self.__class__.use_xpu = True
+
+        places = self._get_places()
+        for place in places:
+            res = self.check_output_with_place(
+                place,
+                atol,
+                no_check_set,
+                equal_nan,
+                check_dygraph=check_dygraph,
+                inplace_atol=inplace_atol,
+            )
+            if check_dygraph:
+                outs, dygraph_dygraph_outs, fetch_list = res
+            else:
+                outs, fetch_list = res
+            if (
+                self.op_type
+                not in compile_vs_runtime_white_list.COMPILE_RUN_OP_WHITE_LIST
+            ):
+                self.check_compile_vs_runtime(fetch_list, outs)
+
+    def check_output_customized(self, checker, custom_place=None):
+        places = self._get_places()
+        if custom_place:
+            places.append(custom_place)
+        for place in places:
+            outs = self.calc_output(place)
+            outs = [np.array(out) for out in outs]
+            outs.sort(key=len)
+            checker(outs)
+
+    def check_output_with_place_customized(self, checker, place):
+        outs = self.calc_output(place)
+        outs = [np.array(out) for out in outs]
+        outs.sort(key=len)
+        checker(outs)
+
+    def _assert_is_close(
+        self,
+        numeric_grads,
+        analytic_grads,
+        names,
+        max_relative_error,
+        msg_prefix,
+    ):
+        for a, b, name in zip(numeric_grads, analytic_grads, names):
+            # It asserts np.abs(a - b) / np.abs(a) < max_relative_error, in which
+            # max_relative_error is 1e-7. According to the value of np.abs(a), we
+            # change np.abs(a) to achieve dynamic threshold. For example, if
+            # the value of np.abs(a) is between 1e-10 and 1e-8, we set np.abs(a)*=1e4.
+            # Therefore, it asserts np.abs(a - b) / (np.abs(a)*1e4) < max_relative_error,
+            # which is the same as np.abs(a - b) / np.abs(a) < max_relative_error*1e4.
+            abs_a = np.abs(a)
+            if abs_a.ndim > 0:
+                if (
+                    self.dtype == np.float64
+                    and self.op_type
+                    not in op_threshold_white_list.NEED_FIX_FP64_CHECK_GRAD_THRESHOLD_OP_LIST
+                ):
+                    abs_a[abs_a < 1e-10] = 1e-3
+                    abs_a[np.logical_and(abs_a > 1e-10, abs_a <= 1e-8)] *= 1e4
+                    abs_a[np.logical_and(abs_a > 1e-8, abs_a <= 1e-6)] *= 1e2
+                elif self.is_bfloat16_op():
+                    abs_a[abs_a < 1e-2] = 1
+                else:
+                    abs_a[abs_a < 1e-3] = 1
+            elif abs_a.ndim == 0:
+                if (
+                    self.dtype == np.float64
+                    and self.op_type
+                    not in op_threshold_white_list.NEED_FIX_FP64_CHECK_GRAD_THRESHOLD_OP_LIST
+                ):
+                    if abs_a < 1e-10:
+                        abs_a = 1e-3
+                    elif abs_a > 1e-10 and abs_a <= 1e-8:
+                        abs_a = abs_a * 1e4
+                    elif abs_a > 1e-8 and abs_a <= 1e-6:
+                        abs_a = abs_a * 1e2
+                elif self.is_bfloat16_op():
+                    abs_a = 1 if abs_a < 1e-2 else abs_a
+                else:
+                    abs_a = 1 if abs_a < 1e-3 else abs_a
+
+            diff_mat = np.abs(a - b) / abs_a
+            max_diff = np.max(diff_mat)
+
+            def err_msg():
+                offset = np.argmax(diff_mat > max_relative_error)
+                return (
+                    "Operator %s error, %s variable %s (shape: %s, dtype: %s) max gradient diff %e over limit %e, "
+                    "the first error element is %d, expected %e, but got %e."
+                ) % (
+                    self.op_type,
+                    msg_prefix,
+                    name,
+                    str(a.shape),
+                    self.dtype,
+                    max_diff,
+                    max_relative_error,
+                    offset,
+                    a.flatten()[offset],
+                    b.flatten()[offset],
+                )
+
+            self.assertLessEqual(max_diff, max_relative_error, err_msg())
+
+    def _check_grad_helper(self):
+        self.infer_dtype_from_inputs_outputs(self.inputs, self.outputs)
+        self.__class__.op_type = self.op_type
+        self.__class__.exist_check_grad = True
+        if self.dtype == np.float64:
+            self.__class__.exist_fp64_check_grad = True
+
+    def check_grad(
+        self,
+        inputs_to_check,
+        output_names,
+        no_grad_set=None,
+        numeric_grad_delta=0.005,
+        in_place=False,
+        max_relative_error=0.005,
+        user_defined_grads=None,
+        user_defined_grad_outputs=None,
+        check_dygraph=True,
+    ):
+
+        self._check_grad_helper()
+        places = self._get_places()
+        for place in places:
+            self.check_grad_with_place(
+                place,
+                inputs_to_check,
+                output_names,
+                no_grad_set,
+                numeric_grad_delta,
+                in_place,
+                max_relative_error,
+                user_defined_grads,
+                user_defined_grad_outputs,
+                check_dygraph=check_dygraph,
+            )
+
+    def check_grad_with_place(
+        self,
+        place,
+        inputs_to_check,
+        output_names,
+        no_grad_set=None,
+        numeric_grad_delta=0.005,
+        in_place=False,
+        max_relative_error=0.005,
+        user_defined_grads=None,
+        user_defined_grad_outputs=None,
+        check_dygraph=True,
+        numeric_place=None,
+    ):
+
+        self.scope = core.Scope()
+        op_inputs = self.inputs if hasattr(self, "inputs") else dict()
+        op_outputs = self.outputs if hasattr(self, "outputs") else dict()
+        op_attrs = self.attrs if hasattr(self, "attrs") else dict()
+
+        self._check_grad_helper()
+        if self.is_bfloat16_op() and self.is_mkldnn_op():
+            check_dygraph = False
+
+        if (
+            self.dtype == np.float64
+            and self.op_type
+            not in op_threshold_white_list.NEED_FIX_FP64_CHECK_GRAD_THRESHOLD_OP_LIST
+        ):
+            numeric_grad_delta = 1e-5
+            max_relative_error = 1e-7
+
+        cache_list = None
+        if hasattr(self, "cache_name_list"):
+            cache_list = self.cache_name_list
+
+        # oneDNN numeric gradient should use CPU kernel
+        use_onednn = False
+        if "use_mkldnn" in op_attrs and op_attrs["use_mkldnn"]:
+            op_attrs["use_mkldnn"] = False
+            use_onednn = True
+
+        self.op = create_op(
+            self.scope,
+            self.op_type,
+            op_inputs,
+            op_outputs,
+            op_attrs,
+            cache_list=cache_list,
+        )
+
+        if use_onednn:
+            op_attrs["use_mkldnn"] = True
+
+        if no_grad_set is None:
+            no_grad_set = set()
+        else:
+            if (
+                (self.op_type not in no_grad_set_white_list.NEED_TO_FIX_OP_LIST)
+                and (
+                    self.op_type not in no_grad_set_white_list.NOT_CHECK_OP_LIST
+                )
+                and (not self.is_bfloat16_op())
+            ):
+                raise AssertionError(
+                    "no_grad_set must be None, op_type is "
+                    + self.op_type
+                    + " Op."
+                )
+
+        for input_to_check in inputs_to_check:
+            set_input(self.scope, self.op, self.inputs, place)
+            tensor_to_check = self.scope.find_var(input_to_check).get_tensor()
+            tensor_size = functools.reduce(
+                lambda a, b: a * b, tensor_to_check.shape(), 1
+            )
+            tensor_ndim = len(tensor_to_check.shape())
+            # for 0D Tensor, it's additional case for OP, so not raise error
+            if tensor_ndim > 0 and tensor_size < 100:
+                self.__class__.input_shape_is_large = False
+
+        if not type(output_names) is list:
+            output_names = [output_names]
+
+        if numeric_place is None:
+            numeric_place = place
+
+        numeric_grads = user_defined_grads or [
+            get_numeric_gradient(
+                numeric_place,
+                self.scope,
+                self.op,
+                self.inputs,
+                input_to_check,
+                output_names,
+                delta=numeric_grad_delta,
+                in_place=in_place,
+            )
+            for input_to_check in inputs_to_check
+        ]
+        analytic_grads = self._get_gradient(
+            inputs_to_check,
+            place,
+            output_names,
+            no_grad_set,
+            user_defined_grad_outputs,
+        )
+        # comparison of bf16 results will happen as fp32
+        # loop over list of grads and convert bf16 to fp32
+        fp32_analytic_grads = []
+        for grad in analytic_grads:
+            if grad.dtype == np.uint16:
+                grad = convert_uint16_to_float(grad)
+                max_relative_error = (
+                    0.04 if max_relative_error < 0.04 else max_relative_error
+                )
+            fp32_analytic_grads.append(grad)
+        analytic_grads = fp32_analytic_grads
+
+        fp32_numeric_grads = []
+        for grad in numeric_grads:
+            if grad.dtype == np.uint16:
+                grad = convert_uint16_to_float(grad)
+                max_relative_error = (
+                    0.04 if max_relative_error < 0.04 else max_relative_error
+                )
+            fp32_numeric_grads.append(grad)
+        numeric_grads = fp32_numeric_grads
+
+        self._assert_is_close(
+            numeric_grads,
+            analytic_grads,
+            inputs_to_check,
+            max_relative_error,
+            "Gradient Check On %s" % str(place),
+        )
+
+        if check_dygraph:
+            with fluid.dygraph.base.guard(place):
+                dygraph_dygraph_grad = self._get_dygraph_grad(
+                    inputs_to_check,
+                    place,
+                    output_names,
+                    user_defined_grad_outputs,
+                    no_grad_set,
+                    check_dygraph,
+                )
+                fp32_grads = []
+                for grad in dygraph_dygraph_grad:
+                    if grad.dtype == np.uint16:
+                        grad = convert_uint16_to_float(grad)
+                        max_relative_error = (
+                            0.03
+                            if max_relative_error < 0.03
+                            else max_relative_error
+                        )
+                    fp32_grads.append(grad)
+                dygraph_dygraph_grad = fp32_grads
+                self._assert_is_close(
+                    numeric_grads,
+                    dygraph_dygraph_grad,
+                    inputs_to_check,
+                    max_relative_error,
+                    "Gradient Check On %s" % str(place),
+                )
+
+    def _find_var_in_dygraph(self, output_vars, name):
+        if name in output_vars:
+            return output_vars[name]
+        else:
+            for output_vars_index in output_vars:
+                for output_vars_selected in output_vars[output_vars_index]:
+                    if output_vars_selected.name == name:
+                        return output_vars_selected
+
+    def _get_dygraph_grad(
+        self,
+        inputs_to_check,
+        place,
+        output_names,
+        user_defined_grad_outputs=None,
+        no_grad_set=None,
+        check_dygraph=True,
+    ):
+        with fluid.dygraph.base.guard(place=place):
+            block = fluid.default_main_program().global_block()
+
+            op_proto = OpProtoHolder.instance().get_op_proto(self.op_type)
+
+            # prepare input variable
+            inputs, inputs_grad_dict = self.append_input_output_for_dygraph(
+                op_proto, self.inputs, True, True, block
+            )
+
+            # prepare output variable
+            outputs = self.append_input_output_for_dygraph(
+                op_proto, self.outputs, False, False, block
+            )
+
+            # prepare attributes
+            attrs_outputs = {}
+            if hasattr(self, "attrs"):
+                for attrs_name in self.attrs:
+                    if self.attrs[attrs_name] is not None:
+                        attrs_outputs[attrs_name] = self.attrs[attrs_name]
+
+            if check_dygraph:
+                dygraph_outputs = self._calc_python_api_output(
+                    place, inputs, outputs
+                )
+            # if outputs is None, kernel sig is empty or other error is happens.
+            if not check_dygraph or dygraph_outputs is None:
+                block.append_op(
+                    type=self.op_type,
+                    inputs=inputs,
+                    outputs=outputs,
+                    attrs=attrs_outputs if hasattr(self, "attrs") else None,
+                )
+            else:
+                outputs = dygraph_outputs
+
+            if self.dtype == np.uint16:
+                cast_inputs = self._find_var_in_dygraph(
+                    outputs, output_names[0]
+                )
+                cast_outputs = block.create_var(
+                    dtype="float32", shape=cast_inputs[0].shape
+                )
+                cast_op = block.append_op(
+                    inputs={"X": cast_inputs},
+                    outputs={"Out": cast_outputs},
+                    type="cast",
+                    attrs={
+                        "in_dtype": core.VarDesc.VarType.BF16,
+                        "out_dtype": core.VarDesc.VarType.FP32,
+                    },
+                )
+                outputs = {output_names[0]: cast_outputs}
+
+            outputs_valid = {}
+            for output_name in output_names:
+                outputs_valid[output_name] = self._find_var_in_dygraph(
+                    outputs, output_name
+                )
+
+            if user_defined_grad_outputs is None:
+                if len(outputs_valid) == 1:
+                    loss = block.create_var(
+                        dtype=self.dtype,
+                        type=core.VarDesc.VarType.LOD_TENSOR,
+                        persistable=False,
+                        stop_gradient=False,
+                        shape=[1],
+                    )
+                    for outputs_valid_key in outputs_valid:
+                        block.append_op(
+                            type="mean",
+                            inputs={"X": outputs_valid[outputs_valid_key]},
+                            outputs={"Out": [loss]},
+                            attrs=None,
+                        )
+                else:
+                    avg_sum = []
+                    for cur_loss in outputs_valid:
+                        cur_avg_loss = block.create_var(
+                            dtype=self.dtype,
+                            type=core.VarDesc.VarType.LOD_TENSOR,
+                            persistable=False,
+                            stop_gradient=False,
+                        )
+                        block.append_op(
+                            type="mean",
+                            inputs={"X": outputs_valid[cur_loss]},
+                            outputs={"Out": [cur_avg_loss]},
+                            attrs=None,
+                        )
+                        avg_sum.append(cur_avg_loss)
+                    loss_sum = block.create_var(
+                        dtype=self.dtype,
+                        type=core.VarDesc.VarType.LOD_TENSOR,
+                        persistable=False,
+                        stop_gradient=False,
+                        shape=[1],
+                    )
+                    block.append_op(
+                        type='sum',
+                        inputs={"X": avg_sum},
+                        outputs={"Out": loss_sum},
+                        attrs=None,
+                    )
+                    loss = block.create_var(
+                        dtype=self.dtype,
+                        type=core.VarDesc.VarType.LOD_TENSOR,
+                        persistable=False,
+                        stop_gradient=False,
+                        shape=[1],
+                    )
+                    block.append_op(
+                        type='scale',
+                        inputs={"X": loss_sum},
+                        outputs={"Out": loss},
+                        attrs={'scale': 1.0 / float(len(avg_sum))},
+                    )
+                loss.backward()
+
+                fetch_list_grad = []
+                for inputs_to_check_name in inputs_to_check:
+                    a = inputs_grad_dict[inputs_to_check_name].gradient()
+                    fetch_list_grad.append(a)
+                return fetch_list_grad
+            else:
+                # user_defined_grad_outputs here are numpy arrays
+                if not isinstance(user_defined_grad_outputs, list):
+                    user_defined_grad_outputs = [user_defined_grad_outputs]
+                grad_outputs = []
+                for grad_out_value in user_defined_grad_outputs:
+                    grad_outputs.append(paddle.to_tensor(grad_out_value))
+                # delete the inputs which no need to calculate grad
+                for no_grad_val in no_grad_set:
+                    del inputs[no_grad_val]
+
+                if in_dygraph_mode():
+                    core.eager.run_backward(
+                        fluid.layers.utils.flatten(outputs), grad_outputs, False
+                    )
+                    grad_inputs = []
+                    for inputs_list in inputs.values():
+                        for inp in inputs_list:
+                            grad_inputs.append(inp.grad.numpy())
+                    return grad_inputs
+                else:
+                    grad_inputs = paddle.grad(
+                        outputs=fluid.layers.utils.flatten(outputs),
+                        inputs=fluid.layers.utils.flatten(inputs),
+                        grad_outputs=grad_outputs,
+                    )
+                    return [grad.numpy() for grad in grad_inputs]
+
+    @staticmethod
+    def _numpy_to_lod_tensor(np_value, lod, place):
+        tensor = core.LoDTensor()
+        tensor.set(np_value, place)
+        if lod is not None:
+            tensor.set_recursive_sequence_lengths(lod)
+        return tensor
+
+    @staticmethod
+    def np_dtype_to_fluid_dtype(input):
+        return input
+
+    @staticmethod
+    def fluid_dtype_to_np_dtype(self, dtype):
+        return dtype
+
+    @staticmethod
+    def np_value_to_fluid_value(input):
+        return input
+
+    def _get_gradient(
+        self,
+        input_to_check,
+        place,
+        output_names,
+        no_grad_set,
+        user_defined_grad_outputs=None,
+        parallel=False,
+    ):
+        prog = Program()
+        scope = core.Scope()
+        block = prog.global_block()
+        self._append_ops(block)
+
+        inputs = self._get_inputs(block)
+        outputs = self._get_outputs(block)
+        feed_dict = self.feed_var(inputs, place)
+
+        if user_defined_grad_outputs is None:
+            if self.dtype == np.uint16:
+                cast_inputs = list(map(block.var, output_names))
+                cast_outputs = block.create_var(
+                    dtype="float32", shape=cast_inputs[0].shape
+                )
+                cast_op = block.append_op(
+                    inputs={"X": cast_inputs},
+                    outputs={"Out": cast_outputs},
+                    type="cast",
+                    attrs={
+                        "in_dtype": core.VarDesc.VarType.BF16,
+                        "out_dtype": core.VarDesc.VarType.FP32,
+                    },
+                )
+                cast_op.desc.infer_var_type(block.desc)
+                cast_op.desc.infer_shape(block.desc)
+                output_names = [cast_outputs.name]
+            loss = append_loss_ops(block, output_names)
+            param_grad_list = append_backward(
+                loss=loss,
+                parameter_list=input_to_check,
+                no_grad_set=no_grad_set,
+            )
+            fetch_list = [g for p, g in param_grad_list]
+        else:
+            assert (
+                parallel is False
+            ), "unsupported parallel mode when giving custom grad outputs."
+            # user_defined_grad_outputs here are numpy arrays
+            if not isinstance(user_defined_grad_outputs, list):
+                user_defined_grad_outputs = [user_defined_grad_outputs]
+            grad_outputs = []
+            for grad_out_value in user_defined_grad_outputs:
+                # `presistable` is used to avoid executor create new var in local scope
+                var = block.create_var(
+                    shape=grad_out_value.shape,
+                    dtype=grad_out_value.dtype,
+                    persistable=True,
+                )
+                true_var = scope.var(var.name)
+                tensor = true_var.get_tensor()
+                tensor.set(grad_out_value, place)
+                grad_outputs.append(var)
+            targets = [
+                outputs[name] for name in outputs if name in output_names
+            ]
+            inputs = [inputs[name] for name in input_to_check if name in inputs]
+            grad_inputs = paddle.static.gradients(
+                targets, inputs, grad_outputs, no_grad_set
+            )
+            fetch_list = grad_inputs
+
+        if parallel:
+            use_cuda = False
+            if isinstance(place, fluid.CUDAPlace):
+                use_cuda = True
+            compiled_prog = fluid.CompiledProgram(prog).with_data_parallel(
+                loss_name=loss.name, places=place
+            )
+            prog = compiled_prog
+        executor = fluid.Executor(place)
+        return list(
+            map(
+                np.array,
+                executor.run(
+                    prog, feed_dict, fetch_list, scope=scope, return_numpy=False
+                ),
+            )
+        )
+
+
+class OpTestTool:
+    @classmethod
+    def skip_if(cls, condition: object, reason: str):
+        return unittest.skipIf(condition, reason)
+
+    @classmethod
+    def skip_if_not_cpu_bf16(cls):
+        return OpTestTool.skip_if(
+            not (
+                isinstance(_current_expected_place(), core.CPUPlace)
+                and core.supports_bfloat16()
+            ),
+            "Place does not support BF16 evaluation",
+        )
+
+    @classmethod
+    def skip_if_not_cpu(cls):
+        return OpTestTool.skip_if(
+            not isinstance(_current_expected_place(), core.CPUPlace),
+            "OneDNN supports only CPU for now",
+        )
diff --git a/python/paddle/fluid/tests/unittests/fft/test_fft.py b/python/paddle/fluid/tests/unittests/fft/test_fft.py
index 1b42badd1481a34b844e431b38e5621e38c6002f..8a57fa81b5729467bcd0afbf37a7e766ed4cf987 100644
--- a/python/paddle/fluid/tests/unittests/fft/test_fft.py
+++ b/python/paddle/fluid/tests/unittests/fft/test_fft.py
@@ -1823,6 +1823,23 @@ class TestFftFreq(unittest.TestCase):
             )
 
 
+@place(DEVICES)
+@parameterize(
+    (TEST_CASE_NAME, 'n', 'd', 'dtype', 'expect_exception'),
+    [
+        ('test_with_0_0', 0, 0, 'float32', ValueError),
+        ('test_with_n_0', 20, 0, 'float32', ValueError),
+        ('test_with_0_d', 0, 20, 'float32', ValueError),
+    ],
+)
+class TestFftFreqException(unittest.TestCase):
+    def test_fftfreq2(self):
+        """Test fftfreq with d = 0"""
+        with paddle.fluid.dygraph.guard(self.place):
+            with self.assertRaises(self.expect_exception):
+                paddle.fft.fftfreq(self.n, self.d, self.dtype)
+
+
 @place(DEVICES)
 @parameterize(
     (TEST_CASE_NAME, 'n', 'd', 'dtype'),
diff --git a/python/paddle/fluid/tests/unittests/fleet_heter_ps_training.py b/python/paddle/fluid/tests/unittests/fleet_heter_ps_training.py
index 8f803ce1db1424826fdbed39f67b158d1f6d56db..917beec752d2aa080614d04d8aeadd010a90308b 100644
--- a/python/paddle/fluid/tests/unittests/fleet_heter_ps_training.py
+++ b/python/paddle/fluid/tests/unittests/fleet_heter_ps_training.py
@@ -41,26 +41,23 @@ def net(batch_size=4, lr=0.01):
     dnn_input_dim, lr_input_dim = int(2), int(2)
 
     with fluid.device_guard("cpu"):
-        dnn_data = fluid.layers.data(
+        dnn_data = paddle.static.data(
             name="dnn_data",
             shape=[-1, 1],
             dtype="int64",
             lod_level=1,
-            append_batch_size=False,
         )
-        lr_data = fluid.layers.data(
+        lr_data = paddle.static.data(
             name="lr_data",
             shape=[-1, 1],
             dtype="int64",
             lod_level=1,
-            append_batch_size=False,
         )
-        label = fluid.layers.data(
+        label = paddle.static.data(
             name="click",
             shape=[-1, 1],
             dtype="float32",
             lod_level=0,
-            append_batch_size=False,
         )
 
         datas = [dnn_data, lr_data, label]
diff --git a/python/paddle/fluid/tests/unittests/fleet_meta_optimizer_base.py b/python/paddle/fluid/tests/unittests/fleet_meta_optimizer_base.py
index 628953391c059fd4b4b5652acc8cbdaa79213ffe..e17a9604fc5c6f9951297d4924d6a87dc5b6d7d6 100755
--- a/python/paddle/fluid/tests/unittests/fleet_meta_optimizer_base.py
+++ b/python/paddle/fluid/tests/unittests/fleet_meta_optimizer_base.py
@@ -55,11 +55,11 @@ class TestFleetMetaOptimizer(unittest.TestCase):
             with fluid.unique_name.guard():
                 role = role_maker.PaddleCloudRoleMaker(is_collective=True)
                 fleet.init(role)
-                input_x = paddle.fluid.layers.data(
-                    name="x", shape=[32], dtype='float32'
+                input_x = paddle.static.data(
+                    name="x", shape=[-1, 32], dtype='float32'
                 )
-                input_y = paddle.fluid.layers.data(
-                    name="y", shape=[1], dtype='int64'
+                input_y = paddle.static.data(
+                    name="y", shape=[-1, 1], dtype='int64'
                 )
 
                 fc_1 = paddle.static.nn.fc(
@@ -92,11 +92,11 @@ class TestFleetMetaOptimizer(unittest.TestCase):
                 role = role_maker.PaddleCloudRoleMaker(is_collective=True)
                 fleet.init(role)
                 with fluid.device_guard("gpu:0"):
-                    input_x = paddle.fluid.layers.data(
-                        name="x", shape=[32], dtype='float32'
+                    input_x = paddle.static.data(
+                        name="x", shape=[-1, 32], dtype='float32'
                     )
-                    input_y = paddle.fluid.layers.data(
-                        name="y", shape=[1], dtype='int64'
+                    input_y = paddle.static.data(
+                        name="y", shape=[-1, 1], dtype='int64'
                     )
 
                 for stage_idx in range(pp_degree):
diff --git a/python/paddle/fluid/tests/unittests/fleet_ps_training.py b/python/paddle/fluid/tests/unittests/fleet_ps_training.py
index be097ad3ac530f36f944d89cd13d62f846743187..9b5ccf7c99f3af6807a9a50b364d5b66321e03da 100644
--- a/python/paddle/fluid/tests/unittests/fleet_ps_training.py
+++ b/python/paddle/fluid/tests/unittests/fleet_ps_training.py
@@ -15,14 +15,15 @@
 from nets import mlp
 from utils import gen_data
 
+import paddle
 import paddle.fluid as fluid
 from paddle.fluid.incubate.fleet.base import role_maker
 from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import (
     fleet,
 )
 
-input_x = fluid.layers.data(name="x", shape=[32], dtype='float32')
-input_y = fluid.layers.data(name="y", shape=[1], dtype='int64')
+input_x = paddle.static.data(name="x", shape=[-1, 32], dtype='float32')
+input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64')
 input_y = fluid.layers.cast(input_y, dtype="float32")
 
 with fluid.device_guard("gpu"):
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_cast.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_cast.py
index 25d5e3a0c9cafa49327236d916917561023dcf41..08f5756a4fa87f297bbc975911095bc093d0c591 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_cast.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_cast.py
@@ -29,9 +29,9 @@ class TrtConvertCastTest(TrtLayerAutoScanTest):
         attrs = [
             program_config.ops[i].attrs for i in range(len(program_config.ops))
         ]
-        if attrs[0]['in_dtype'] not in [0, 1, 2, 4, 5] or attrs[0][
+        if attrs[0]['in_dtype'] not in [0, 1, 2, 3, 4, 5] or attrs[0][
             'out_dtype'
-        ] not in [0, 1, 2, 4, 5]:
+        ] not in [0, 1, 2, 3, 4, 5]:
             return False
         compile_version = paddle_infer.get_trt_compile_version()
         runtime_version = paddle_infer.get_trt_runtime_version()
@@ -55,8 +55,14 @@ class TrtConvertCastTest(TrtLayerAutoScanTest):
         def generate_input(type):
             return np.ones([1, 3, 64, 64]).astype(type)
 
-        for in_dtype in [np.bool_, np.int32, np.float32, np.float64]:
-            for out_dtype in [np.bool_, np.int32, np.float32, np.float64]:
+        for in_dtype in [np.bool_, np.int32, np.float32, np.float64, np.int64]:
+            for out_dtype in [
+                np.bool_,
+                np.int32,
+                np.float32,
+                np.float64,
+                np.int64,
+            ]:
                 self.has_bool_dtype = (in_dtype == np.bool_) or (
                     out_dtype == np.bool_
                 )
diff --git a/python/paddle/fluid/tests/unittests/ir/test_ir_embedding_eltwise_layernorm_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/test_ir_embedding_eltwise_layernorm_fuse_pass.py
index ba5e304b61155765523c418eac2157fd77b90108..8f6c8592423047e40abcc4258a51aaad43c95eb5 100644
--- a/python/paddle/fluid/tests/unittests/ir/test_ir_embedding_eltwise_layernorm_fuse_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/test_ir_embedding_eltwise_layernorm_fuse_pass.py
@@ -25,23 +25,20 @@ import paddle.fluid.core as core
 class EmbEltwiseLayerNormFusePassTest(PassTest):
     def setUp(self):
         with fluid.program_guard(self.main_program, self.startup_program):
-            word_id = fluid.layers.data(
+            word_id = paddle.static.data(
                 name="word_id",
                 shape=[1, 128, 1],
                 dtype="int64",
-                append_batch_size=False,
             )
-            pos_id = fluid.layers.data(
+            pos_id = paddle.static.data(
                 name="pos_id",
                 shape=[1, 128, 1],
                 dtype="int64",
-                append_batch_size=False,
             )
-            sent_id = fluid.layers.data(
+            sent_id = paddle.static.data(
                 name="sent_id",
                 shape=[1, 128, 1],
                 dtype="int64",
-                append_batch_size=False,
             )
             word_emb = fluid.layers.embedding(
                 input=word_id, size=(128, 768), dtype='float32'
@@ -56,29 +53,25 @@ class EmbEltwiseLayerNormFusePassTest(PassTest):
             add2 = paddle.add(add1, sent_emb)
             hidden1 = paddle.static.nn.layer_norm(input=add2, begin_norm_axis=2)
 
-            id1 = fluid.layers.data(
+            id1 = paddle.static.data(
                 name="id1",
                 shape=[1, 128, 1],
                 dtype="int64",
-                append_batch_size=False,
             )
-            id2 = fluid.layers.data(
+            id2 = paddle.static.data(
                 name="id2",
                 shape=[1, 128, 1],
                 dtype="int64",
-                append_batch_size=False,
             )
-            id3 = fluid.layers.data(
+            id3 = paddle.static.data(
                 name="id3",
                 shape=[1, 128, 1],
                 dtype="int64",
-                append_batch_size=False,
             )
-            id4 = fluid.layers.data(
+            id4 = paddle.static.data(
                 name="id4",
                 shape=[1, 128, 1],
                 dtype="int64",
-                append_batch_size=False,
             )
             emb1 = fluid.layers.embedding(
                 input=id1, size=(128, 768), dtype='float32'
diff --git a/python/paddle/fluid/tests/unittests/ir/test_ir_subgraph_python_interface.py b/python/paddle/fluid/tests/unittests/ir/test_ir_subgraph_python_interface.py
index 21697177d0dd626f9f81c555e8dca2dce4bfe3a2..6e0bb08c926b3180d64812e66db403b8751c4bbb 100644
--- a/python/paddle/fluid/tests/unittests/ir/test_ir_subgraph_python_interface.py
+++ b/python/paddle/fluid/tests/unittests/ir/test_ir_subgraph_python_interface.py
@@ -28,10 +28,12 @@ paddle.enable_static()
 class TestQuantizationSubGraph(unittest.TestCase):
     def build_graph_with_sub_graph(self):
         def linear_fc(num):
-            data = fluid.layers.data(
-                name='image', shape=[1, 32, 32], dtype='float32'
+            data = paddle.static.data(
+                name='image', shape=[-1, 1, 32, 32], dtype='float32'
+            )
+            label = paddle.static.data(
+                name='label', shape=[-1, 1], dtype='int64'
             )
-            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
             hidden = data
             for _ in range(num):
                 hidden = paddle.static.nn.fc(
diff --git a/python/paddle/fluid/tests/unittests/ir_memory_optimize_net_base.py b/python/paddle/fluid/tests/unittests/ir_memory_optimize_net_base.py
index 9f7b2d8533573e8f68afe9e49477790e0c80e6de..ae2870af8315b06c56854a7f53d160c9216d9f15 100644
--- a/python/paddle/fluid/tests/unittests/ir_memory_optimize_net_base.py
+++ b/python/paddle/fluid/tests/unittests/ir_memory_optimize_net_base.py
@@ -60,11 +60,11 @@ class BuildIrMemOptBase(unittest.TestCase):
         fluid.default_startup_program().random_seed = 100
         fluid.default_main_program().random_seed = 100
 
-        data = fluid.layers.data(
-            name="words", shape=[1], dtype="int64", lod_level=1
+        data = paddle.static.data(
+            name="words", shape=[-1, 1], dtype="int64", lod_level=1
         )
 
-        label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+        label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64")
 
         cost = network(data, label, len(self.word_dict))
         optimizer = fluid.optimizer.Adam(learning_rate=0.001)
diff --git a/python/paddle/fluid/tests/unittests/mlu/collective_allgather_api.py b/python/paddle/fluid/tests/unittests/mlu/collective_allgather_api.py
index 1d8d11ad1120008c67ef497b698608a4c6b9233e..91c839e9ea5310deef48a73571bcd2ec4df79e0b 100755
--- a/python/paddle/fluid/tests/unittests/mlu/collective_allgather_api.py
+++ b/python/paddle/fluid/tests/unittests/mlu/collective_allgather_api.py
@@ -45,8 +45,8 @@ class TestCollectiveAllgatherAPI(TestCollectiveAPIRunnerBase):
     def get_model(self, main_prog, startup_program, rank):
         with fluid.program_guard(main_prog, startup_program):
             tensor_list = []
-            tindata = layers.data(
-                name="tindata", shape=[10, 1000], dtype='float32'
+            tindata = paddle.static.data(
+                name="tindata", shape=[-1, 10, 1000], dtype='float32'
             )
             paddle.distributed.all_gather(tensor_list, tindata)
             return tensor_list
diff --git a/python/paddle/fluid/tests/unittests/mlu/collective_allgather_op.py b/python/paddle/fluid/tests/unittests/mlu/collective_allgather_op.py
index 8040e834d65a90b7f6e883fff481008963f084f7..33d9f05cf7b2fa6603f99c6d974a2aeb88ebdf83 100755
--- a/python/paddle/fluid/tests/unittests/mlu/collective_allgather_op.py
+++ b/python/paddle/fluid/tests/unittests/mlu/collective_allgather_op.py
@@ -42,9 +42,10 @@ class TestCollectiveAllgather(TestCollectiveRunnerBase):
         ring_id = 0
         nranks = 2
         with fluid.program_guard(main_prog, startup_program):
-            tindata = layers.data(
-                name="tindata", shape=[10, 1000], dtype='float32'
+            tindata = paddle.static.data(
+                name="tindata", shape=[-1, 10, 1000], dtype='float32'
             )
+            tindata.desc.set_need_check_feed(False)
             toutdata = main_prog.current_block().create_var(
                 name="outofallgather",
                 dtype='float32',
diff --git a/python/paddle/fluid/tests/unittests/mlu/collective_allreduce_api.py b/python/paddle/fluid/tests/unittests/mlu/collective_allreduce_api.py
index 22ca990c55afde93e2b28ab1167785e6e0559f49..a636dbb4692f6d721c83cbfd20bc60376b6cdede 100644
--- a/python/paddle/fluid/tests/unittests/mlu/collective_allreduce_api.py
+++ b/python/paddle/fluid/tests/unittests/mlu/collective_allreduce_api.py
@@ -44,8 +44,8 @@ class TestCollectiveAllreduceAPI(TestCollectiveAPIRunnerBase):
 
     def get_model(self, main_prog, startup_program, rank):
         with fluid.program_guard(main_prog, startup_program):
-            tindata = layers.data(
-                name="tindata", shape=[10, 1000], dtype='float32'
+            tindata = paddle.static.data(
+                name="tindata", shape=[-1, 10, 1000], dtype='float32'
             )
             paddle.distributed.all_reduce(tindata)
             return [tindata]
diff --git a/python/paddle/fluid/tests/unittests/mlu/collective_allreduce_op.py b/python/paddle/fluid/tests/unittests/mlu/collective_allreduce_op.py
index 609d9e7c41688e40a16b8216a8a69bc9c4b7815c..2d9f6ee03e0a836e18b626aaf2ab1ecc7eafe521 100644
--- a/python/paddle/fluid/tests/unittests/mlu/collective_allreduce_op.py
+++ b/python/paddle/fluid/tests/unittests/mlu/collective_allreduce_op.py
@@ -42,9 +42,11 @@ class TestCollectiveAllreduce(TestCollectiveRunnerBase):
     def get_model(self, main_prog, startup_program, col_type):
         ring_id = 0
         with fluid.program_guard(main_prog, startup_program):
-            tindata = layers.data(
-                name="tindata", shape=[10, 1000], dtype='float32'
+            tindata = paddle.static.data(
+                name="tindata", shape=[-1, 10, 1000], dtype='float32'
             )
+            tindata.desc.set_need_check_feed(False)
+            
             toutdata = main_prog.current_block().create_var(
                 name="outof" + col_type,
                 dtype='float32',
diff --git a/python/paddle/fluid/tests/unittests/mlu/collective_broadcast_api.py b/python/paddle/fluid/tests/unittests/mlu/collective_broadcast_api.py
index 0c1ae572251aa6dc579090ead3433b73178f79c5..673ffe67a0f7b9a3744e48b85e36db8cafa46e87 100644
--- a/python/paddle/fluid/tests/unittests/mlu/collective_broadcast_api.py
+++ b/python/paddle/fluid/tests/unittests/mlu/collective_broadcast_api.py
@@ -44,9 +44,11 @@ class TestCollectiveBroadcastAPI(TestCollectiveAPIRunnerBase):
 
     def get_model(self, main_prog, startup_program, rank):
         with fluid.program_guard(main_prog, startup_program):
-            tindata = layers.data(
-                name="tindata", shape=[10, 1000], dtype="float32"
+            tindata = paddle.static.data(
+                name="tindata", shape=[-1, 10, 1000], dtype="float32"
             )
+            tindata.desc.set_need_check_feed(False)
+            
             paddle.distributed.broadcast(tindata, src=1)
             return [tindata]
 
diff --git a/python/paddle/fluid/tests/unittests/mlu/collective_broadcast_op.py b/python/paddle/fluid/tests/unittests/mlu/collective_broadcast_op.py
index 982d5d204fccc23d0a66bbb605135270c51534e4..ed42a7964ea1a8589efb148a360eb32d4826bac8 100755
--- a/python/paddle/fluid/tests/unittests/mlu/collective_broadcast_op.py
+++ b/python/paddle/fluid/tests/unittests/mlu/collective_broadcast_op.py
@@ -43,9 +43,11 @@ class TestCollectiveBroadcast(TestCollectiveRunnerBase):
         ring_id = 0
         rootid = 1
         with fluid.program_guard(main_prog, startup_program):
-            tindata = layers.data(
-                name="tindata", shape=[10, 1000], dtype='float32'
+            tindata = paddle.static.data(
+                name="tindata", shape=[-1, 10, 1000], dtype='float32'
             )
+            tindata.desc.set_need_check_feed(False)
+            
             toutdata = main_prog.current_block().create_var(
                 name="outofbroadcast",
                 dtype='float32',
diff --git a/python/paddle/fluid/tests/unittests/mlu/collective_reduce_api.py b/python/paddle/fluid/tests/unittests/mlu/collective_reduce_api.py
index 33a0d9d0d51a7efb0724c7d04c80e32a2f3c0480..2121aaee2bcd8da9113cab65dd602c2ef868cc1c 100644
--- a/python/paddle/fluid/tests/unittests/mlu/collective_reduce_api.py
+++ b/python/paddle/fluid/tests/unittests/mlu/collective_reduce_api.py
@@ -44,9 +44,11 @@ class TestCollectiveReduceAPI(TestCollectiveAPIRunnerBase):
 
     def get_model(self, main_prog, startup_program, rank):
         with fluid.program_guard(main_prog, startup_program):
-            tindata = layers.data(
-                name="tindata", shape=[10, 1000], dtype='float32'
+            tindata = paddle.static.data(
+                name="tindata", shape=[-1, 10, 1000], dtype='float32'
             )
+            tindata.desc.set_need_check_feed(False)
+            
             paddle.distributed.reduce(tindata, dst=0)
             return [tindata]
 
diff --git a/python/paddle/fluid/tests/unittests/mlu/collective_reduce_op.py b/python/paddle/fluid/tests/unittests/mlu/collective_reduce_op.py
index d9271376b0f3124427003d4ef90cc7a3ae87404a..f389cef9d6f8884ef0b43139b93bc9f96d72d2fa 100644
--- a/python/paddle/fluid/tests/unittests/mlu/collective_reduce_op.py
+++ b/python/paddle/fluid/tests/unittests/mlu/collective_reduce_op.py
@@ -43,9 +43,11 @@ class TestCollectiveReduce(TestCollectiveRunnerBase):
         ring_id = 0
         rootid = 1
         with fluid.program_guard(main_prog, startup_program):
-            tindata = layers.data(
-                name="tindata", shape=[10, 1000], dtype='float32'
+            tindata = paddle.static.data(
+                name="tindata", shape=[-1, 10, 1000], dtype='float32'
             )
+            tindata.desc.set_need_check_feed(False)
+            
             toutdata = main_prog.current_block().create_var(
                 name="outof" + col_type,
                 dtype='float32',
diff --git a/python/paddle/fluid/tests/unittests/mlu/sync_batch_norm_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/sync_batch_norm_op_mlu.py
index 20b3a00cc947b3139f236cf799d45f0e83b0efa8..8a9405c1fcb3e007214e04dce175519452188d35 100644
--- a/python/paddle/fluid/tests/unittests/mlu/sync_batch_norm_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/sync_batch_norm_op_mlu.py
@@ -70,11 +70,10 @@ class TestSyncBatchNormOpTraining(TestSyncBatchNormRunnerBase):
         use_cudnn = False
         with fluid.unique_name.guard():
             with fluid.program_guard(main, startup):
-                data = fluid.layers.data(
+                data = paddle.static.data(
                     name='input',
                     shape=self.dshape,
                     dtype=self.dtype,
-                    append_batch_size=False,
                 )
                 conv = paddle.static.nn.conv2d(
                     input=data,
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_accuracy_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_accuracy_op_mlu.py
index a32765b8d306894bdfe843330337ae5536f8ed5b..644a2e81dbffa4d1e2bf6a4c9648bb7951b8c57f 100755
--- a/python/paddle/fluid/tests/unittests/mlu/test_accuracy_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_accuracy_op_mlu.py
@@ -71,16 +71,16 @@ class TestAccuracyOpError(unittest.TestCase):
             x1 = fluid.create_lod_tensor(
                 np.array([[-1]]), [[1]], fluid.MLUPlace(0)
             )
-            label = fluid.layers.data(
+            label = paddle.static.data(
                 name='label', shape=[-1, 1], dtype="int32"
             )
             self.assertRaises(TypeError, paddle.static.accuracy, x1, label)
             self.assertRaises(TypeError, paddle.metric.accuracy, x1, label)
             # The input dtype of accuracy_op must be float32 or float64.
-            x2 = fluid.layers.data(name='x2', shape=[4], dtype="int32")
+            x2 = paddle.static.data(name='x2', shape=[-1, 4], dtype="int32")
             self.assertRaises(TypeError, paddle.static.accuracy, x2, label)
             self.assertRaises(TypeError, paddle.metric.accuracy, x2, label)
-            x3 = fluid.layers.data(name='input', shape=[-1, 2], dtype="float16")
+            x3 = paddle.static.data(name='input', shape=[-1, 2], dtype="float16")
             paddle.static.accuracy(input=x3, label=label)
             paddle.metric.accuracy(input=x3, label=label)
 
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu.py
index 66876ddb7929452ddac4225e4095cc11fdd6442c..abd86efcf84d08017172a5af16844b2985c2e379 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu.py
@@ -745,7 +745,7 @@ class TestBatchNormOpError(unittest.TestCase):
 
             # the input dtype of batch_norm must be float16 or float32 or float64
             # float16 only can be set on GPU place
-            x2 = fluid.layers.data(name='x2', shape=[3, 4, 5, 6], dtype="int32")
+            x2 = paddle.static.data(name='x2', shape=[-1, 3, 4, 5, 6], dtype="int32")
             self.assertRaises(TypeError, paddle.static.nn.batch_norm, x2)
 
 
@@ -761,7 +761,7 @@ class TestDygraphBatchNormAPIError(unittest.TestCase):
 
             # the input dtype of BatchNorm must be float16 or float32 or float64
             # float16 only can be set on GPU place
-            x2 = fluid.layers.data(name='x2', shape=[3, 4, 5, 6], dtype="int32")
+            x2 = paddle.static.data(name='x2', shape=[-1, 3, 4, 5, 6], dtype="int32")
             self.assertRaises(TypeError, batch_norm, x2)
 
 
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_compare_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_compare_op_mlu.py
index f994fb185f64efe22d34ebb75688e6a1c2408d70..568d03acd4e638aa7c621b7efed622e3b668a1b0 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_compare_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_compare_op_mlu.py
@@ -44,9 +44,9 @@ def create_test_class(op_type, typename, callback):
         def test_errors(self):
             paddle.enable_static()
             with program_guard(Program(), Program()):
-                a = fluid.layers.data(name='a', shape=[2], dtype='float32')
-                b = fluid.layers.data(name='b', shape=[2], dtype='float32')
-                c = fluid.layers.data(name='c', shape=[2], dtype='int16')
+                a = paddle.static.data(name='a', shape=[-1, 2], dtype='float32')
+                b = paddle.static.data(name='b', shape=[-1, 2], dtype='float32')
+                c = paddle.static.data(name='c', shape=[-1, 2], dtype='int16')
                 d = fluid.create_lod_tensor(np.array([[-1]]), [[1]], self.place)
 
                 op = eval("fluid.layers.%s" % self.op_type)
@@ -134,8 +134,8 @@ def create_test_class(op_type, typename, callback):
         def test_attr_name(self):
             paddle.enable_static()
             with program_guard(Program(), Program()):
-                x = fluid.layers.data(name='x', shape=[4], dtype=typename)
-                y = fluid.layers.data(name='y', shape=[4], dtype=typename)
+                x = paddle.static.data(name='x', shape=[-1, 4], dtype=typename)
+                y = paddle.static.data(name='y', shape=[-1, 4], dtype=typename)
                 op = eval("paddle.%s" % (self.op_type))
                 out = op(x=x, y=y, name="name_%s" % (self.op_type))
             self.assertEqual("name_%s" % (self.op_type) in out.name, True)
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_conv2d_transposed_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_conv2d_transposed_op_mlu.py
index a24206e0900b37e949ba6a018c30ae36b9d86d70..b4329068c62e0a3ffdc3805377fa085737cf0716 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_conv2d_transposed_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_conv2d_transposed_op_mlu.py
@@ -493,11 +493,11 @@ class TestConv2DTransposeAPI(unittest.TestCase):
         self.place = paddle.device.MLUPlace(0)
 
     def test_case1(self):
-        data1 = fluid.layers.data(
-            name='data1', shape=[3, 5, 5], dtype='float32'
+        data1 = paddle.static.data(
+            name='data1', shape=[-1, 3, 5, 5], dtype='float32'
         )
-        data2 = fluid.layers.data(
-            name='data2', shape=[5, 5, 3], dtype='float32'
+        data2 = paddle.static.data(
+            name='data2', shape=[-1, 5, 5, 3], dtype='float32'
         )
         out1 = paddle.static.nn.conv2d_transpose(
             input=data1,
@@ -583,7 +583,7 @@ class TestConv2DTransposeOpException(unittest.TestCase):
         self.place = paddle.device.MLUPlace(0)
 
     def test_exception(self):
-        data = fluid.layers.data(name='data', shape=[3, 5, 5], dtype="float32")
+        data = paddle.static.data(name='data', shape=[-1, 3, 5, 5], dtype="float32")
 
         def attr_data_format():
             out = paddle.static.nn.conv2d_transpose(
@@ -630,8 +630,8 @@ class TestConv2DTransposeOpException(unittest.TestCase):
 
         self.assertRaises(ValueError, attr_padding_with_data_format)
 
-        error_input = fluid.layers.data(
-            name='error_data', shape=[1], dtype="float32"
+        error_input = paddle.static.data(
+            name='error_data', shape=[-1, 1], dtype="float32"
         )
 
         def error_input_size():
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_expand_as_v2_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_expand_as_v2_op_mlu.py
index 0a88f759831e4672f7afec6d4a12796f97c2311f..5d7b39925665c20675f028b42a4a0347e2825dfe 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_expand_as_v2_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_expand_as_v2_op_mlu.py
@@ -137,14 +137,13 @@ class TestExpandAsV2API(unittest.TestCase):
     def test_api(self):
         input1 = np.random.random([12, 14]).astype("float32")
         input2 = np.random.random([2, 12, 14]).astype("float32")
-        x = fluid.layers.data(
-            name='x', shape=[12, 14], append_batch_size=False, dtype="float32"
+        x = paddle.static.data(
+            name='x', shape=[12, 14], dtype="float32"
         )
 
-        y = fluid.layers.data(
+        y = paddle.static.data(
             name='target_tensor',
             shape=[2, 12, 14],
-            append_batch_size=False,
             dtype="float32",
         )
 
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_expand_v2_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_expand_v2_op_mlu.py
index 46d3d9d214fd7d05a021892692ad7189a1a3690f..c2a3dbfa20944c75a671f9b6eda43684a95e68ee 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_expand_v2_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_expand_v2_op_mlu.py
@@ -220,9 +220,9 @@ class TestExpandV2Error(unittest.TestCase):
             )
             shape = [2, 2]
             self.assertRaises(TypeError, paddle.tensor.expand, x1, shape)
-            x2 = fluid.layers.data(name='x2', shape=[4], dtype="uint8")
+            x2 = paddle.static.data(name='x2', shape=[-1, 4], dtype="uint8")
             self.assertRaises(TypeError, paddle.tensor.expand, x2, shape)
-            x3 = fluid.layers.data(name='x3', shape=[4], dtype="bool")
+            x3 = paddle.static.data(name='x3', shape=[-1, 4], dtype="bool")
             x3.stop_gradient = False
             self.assertRaises(ValueError, paddle.tensor.expand, x3, shape)
 
@@ -231,15 +231,14 @@ class TestExpandV2Error(unittest.TestCase):
 class TestExpandV2API(unittest.TestCase):
     def test_api(self):
         input = np.random.random([12, 14]).astype("float32")
-        x = fluid.layers.data(
-            name='x', shape=[12, 14], append_batch_size=False, dtype="float32"
+        x = paddle.static.data(
+            name='x', shape=[12, 14], dtype="float32"
         )
 
         positive_2 = fluid.layers.fill_constant([1], "int32", 12)
-        expand_shape = fluid.layers.data(
+        expand_shape = paddle.static.data(
             name="expand_shape",
             shape=[2],
-            append_batch_size=False,
             dtype="int32",
         )
 
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_fill_constant_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_fill_constant_op_mlu.py
index 7a19779b049ce4d8874f1e4559b67b7149ba3cb4..6d67b6fc8f095a4c7387bad97809cadfb24e1b24 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_fill_constant_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_fill_constant_op_mlu.py
@@ -391,7 +391,7 @@ class TestFillConstantOpError(unittest.TestCase):
     def test_errors(self):
         with program_guard(Program(), Program()):
             # for ci coverage
-            x1 = fluid.layers.data(name='x1', shape=[1], dtype="int16")
+            x1 = paddle.static.data(name='x1', shape=[-1, 1], dtype="int16")
             self.assertRaises(
                 TypeError,
                 fluid.layers.fill_constant,
@@ -411,7 +411,7 @@ class TestFillConstantOpError(unittest.TestCase):
 
             # The argument dtype of fill_constant_op must be one of bool, float16,
             # float32, float64, uint8, int16, int32 or int64
-            x2 = fluid.layers.data(name='x2', shape=[1], dtype="int32")
+            x2 = paddle.static.data(name='x2', shape=[-1, 1], dtype="int32")
 
             self.assertRaises(
                 TypeError,
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_layer_norm_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_layer_norm_op_mlu.py
index 6e095c9b85144c7b61f3bebf8b4269f6df80489b..e9d248928556b9eacddfb564ed4a4a8a55aeacdc 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_layer_norm_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_layer_norm_op_mlu.py
@@ -245,11 +245,10 @@ class TestLayerNormOp(unittest.TestCase):
 
 class TestLayerNormAPI(unittest.TestCase):
     def test_case(self):
-        x = fluid.layers.data(
+        x = paddle.static.data(
             name='x',
             shape=[64, 32, 256],
             dtype='float32',
-            append_batch_size=False,
         )
         x = paddle.static.nn.layer_norm(
             x,
@@ -291,7 +290,7 @@ class TestDygraphLayerNormAPIError(unittest.TestCase):
             self.assertRaises(TypeError, layer_norm, x1)
 
             # the input dtype of LayerNorm must be float32 or float16
-            x2 = fluid.layers.data(name='x2', shape=[3, 32, 32], dtype="int32")
+            x2 = paddle.static.data(name='x2', shape=[-1, 3, 32, 32], dtype="int32")
             self.assertRaises(TypeError, layer_norm, x2)
 
 
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_log_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_log_op_mlu.py
index 11df0949a6f47151ee38c4efd8ff162be90b1e7d..c167ac67cc436102ad9f50f4953f9eb052a3a17e 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_log_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_log_op_mlu.py
@@ -70,11 +70,11 @@ class TestLog(TestActivation):
         self.outputs = {'Out': out}
 
     def test_error(self):
-        in1 = fluid.layers.data(
-            name="in1", shape=[11, 17], append_batch_size=False, dtype="int32"
+        in1 = paddle.static.data(
+            name="in1", shape=[11, 17], dtype="int32"
         )
-        in2 = fluid.layers.data(
-            name="in2", shape=[11, 17], append_batch_size=False, dtype="int64"
+        in2 = paddle.static.data(
+            name="in2", shape=[-1, 11, 17], dtype="int64"
         )
 
         self.assertRaises(TypeError, fluid.layers.log, in1)
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_momentum_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_momentum_op_mlu.py
index 5e5e517878eb3dbece3cc924e416f0cbce80418d..3a757e9cb66d86547095a874451e676eeb88f82f 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_momentum_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_momentum_op_mlu.py
@@ -140,8 +140,8 @@ class TestMomentumV2(unittest.TestCase):
         place = fluid.MLUPlace(0)
         main = fluid.Program()
         with fluid.program_guard(main):
-            x = fluid.layers.data(name='x', shape=[13], dtype='float32')
-            y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+            x = paddle.static.data(name='x', shape=[-1, 13], dtype='float32')
+            y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
             y_predict = paddle.static.nn.fc(x, size=1)
             cost =paddle.nn.functional.square_error_cost(input=y_predict, label=y)
             avg_cost = paddle.mean(cost)
@@ -265,8 +265,8 @@ class TestMomentumOpWithDecayAPI(unittest.TestCase):
         place = fluid.MLUPlace(0)
         main = fluid.Program()
         with fluid.program_guard(main):
-            x = fluid.layers.data(name='x', shape=[13], dtype='float32')
-            y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+            x = paddle.static.data(name='x', shape=[-1, 13], dtype='float32')
+            y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
             y_predict = paddle.static.nn.fc(x, size=1)
             cost =paddle.nn.functional.square_error_cost(input=y_predict, label=y)
             avg_cost = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_one_hot_v2_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_one_hot_v2_op_mlu.py
index b28d191fa2c3cd6f686eced3f33039e1dad8bc71..dfe989fa83b7b4443a6d65b747c322e23864028a 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_one_hot_v2_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_one_hot_v2_op_mlu.py
@@ -150,8 +150,8 @@ class TestOneHotOp_exception(unittest.TestCase):
     def test_check_output(self):
         program = Program()
         with program_guard(program):
-            x = fluid.layers.data(
-                name='x', shape=[self.dimension], dtype='float32', lod_level=1
+            x = paddle.static.data(
+                name='x', shape=[-1, self.dimension], dtype='float32', lod_level=1
             )
             block = program.current_block()
             one_hot_out = block.create_var(
@@ -207,7 +207,7 @@ class TestOneHotOpApi(unittest.TestCase):
             #     paddle.to_tensor(label), depth)
 
     def _run(self, depth):
-        label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+        label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64")
         one_hot_label = fluid.one_hot(input=label, depth=depth)
 
         label_data = np.array(
@@ -234,10 +234,9 @@ class BadInputTestOnehotV2(unittest.TestCase):
         with fluid.program_guard(fluid.Program()):
 
             def test_bad_x():
-                label = fluid.layers.data(
+                label = paddle.static.data(
                     name="label",
                     shape=[4],
-                    append_batch_size=False,
                     dtype="float32",
                 )
                 one_hot_label = fluid.one_hot(input=label, depth=4)
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_pool2d_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_pool2d_op_mlu.py
index 139d347e0d5da80d43c041314d865f6a480aad7c..4e4f0c56615d290b4b9103b7408f17348afb1637 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_pool2d_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_pool2d_op_mlu.py
@@ -705,31 +705,27 @@ class TestPool2DAPI(unittest.TestCase):
         x_NHWC = np.random.random([2, 5, 5, 3]).astype("float32")
         x_NCHW = np.random.random([2, 3, 5, 5]).astype("float32")
 
-        input_NHWC = fluid.layers.data(
+        input_NHWC = paddle.static.data(
             name="input_NHWC",
             shape=[2, 5, 5, 3],
-            append_batch_size=False,
             dtype="float32",
         )
 
-        input_NCHW = fluid.layers.data(
+        input_NCHW = paddle.static.data(
             name="input_NCHW",
             shape=[2, 3, 5, 5],
-            append_batch_size=False,
             dtype="float32",
         )
 
-        input_NHWC_negetive = fluid.layers.data(
+        input_NHWC_negetive = paddle.static.data(
             name="input_NHWC_negetive",
             shape=[2, -1, 5, 3],
-            append_batch_size=False,
             dtype="float32",
         )
 
-        input_NCHW_negetive = fluid.layers.data(
+        input_NCHW_negetive = paddle.static.data(
             name="input_NCHW_negetive",
             shape=[2, 3, -1, -1],
-            append_batch_size=False,
             dtype="float32",
         )
 
@@ -930,10 +926,9 @@ class TestPool2DAPI(unittest.TestCase):
 
 class TestPool2DAPI_Error(unittest.TestCase):
     def test_api(self):
-        input_NHWC = fluid.layers.data(
+        input_NHWC = paddle.static.data(
             name="input_NHWC",
             shape=[2, 5, 5, 3],
-            append_batch_size=False,
             dtype="float32",
         )
         ksize = [3, 3]
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_split_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_split_op_mlu.py
index 072b985613e0818075fa1d51583939665b0434a4..83d18b667385307ac36630a4f09e82e60de5b0c9 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_split_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_split_op_mlu.py
@@ -94,7 +94,7 @@ class TestCase5(TestCase1):
 class API_TestSplit(unittest.TestCase):
     def test_out(self):
         with fluid.program_guard(fluid.Program(), fluid.Program()):
-            data = fluid.layers.data('data', shape=[-1, 10], dtype='float32')
+            data = paddle.static.data('data', shape=[-1, 10], dtype='float32')
             x0, x1 = paddle.split(data, num_or_sections=(3, 7), axis=1)
             place = fluid.MLUPlace(0)
             exe = fluid.Executor(place)
@@ -108,7 +108,7 @@ class API_TestSplit(unittest.TestCase):
 class API_TestSplit2(unittest.TestCase):
     def test_out(self):
         with fluid.program_guard(fluid.Program(), fluid.Program()):
-            data = fluid.layers.data('data', shape=[-1, 10], dtype='float32')
+            data = paddle.static.data('data', shape=[-1, 10], dtype='float32')
             x0, x1 = paddle.split(data, num_or_sections=2, axis=1)
             place = fluid.MLUPlace(0)
             exe = fluid.Executor(place)
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_stack_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_stack_op_mlu.py
index eefe1d7d691fcdc2325637d38349a50dc58f6c08..8bbd9e77897d9a505c668c2ce9b79c27dd784368 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_stack_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_stack_op_mlu.py
@@ -120,9 +120,9 @@ class TestStackOpHalf(TestStackOpBase):
 class API_test(unittest.TestCase):
     def test_out(self):
         with fluid.program_guard(fluid.Program(), fluid.Program()):
-            data1 = fluid.layers.data('data1', shape=[1, 2], dtype='float32')
-            data2 = fluid.layers.data('data2', shape=[1, 2], dtype='float32')
-            data3 = fluid.layers.data('data3', shape=[1, 2], dtype='float32')
+            data1 = paddle.static.data('data1', shape=[-1, 1, 2], dtype='float32')
+            data2 = paddle.static.data('data2', shape=[-1, 1, 2], dtype='float32')
+            data3 = paddle.static.data('data3', shape=[-1, 1, 2], dtype='float32')
             result_stack = paddle.stack([data1, data2, data3], axis=0)
             place = paddle.MLUPlace(0)
             exe = fluid.Executor(place)
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_strided_slice_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_strided_slice_op_mlu.py
index 35069485054f58079924aee28321397dc17e8f51..07a9179c5a8b112d94eefde4e454f5ac6919a089 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_strided_slice_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_strided_slice_op_mlu.py
@@ -527,20 +527,19 @@ class TestStridedSliceAPI(unittest.TestCase):
         input = np.random.random([3, 4, 5, 6]).astype("float32")
         minus_1 = fluid.layers.fill_constant([1], "int32", -1)
         minus_3 = fluid.layers.fill_constant([1], "int32", -3)
-        starts = fluid.layers.data(
-            name='starts', shape=[3], dtype='int32', append_batch_size=False
+        starts = paddle.static.data(
+            name='starts', shape=[3], dtype='int32'
         )
-        ends = fluid.layers.data(
-            name='ends', shape=[3], dtype='int32', append_batch_size=False
+        ends = paddle.static.data(
+            name='ends', shape=[3], dtype='int32'
         )
-        strides = fluid.layers.data(
-            name='strides', shape=[3], dtype='int32', append_batch_size=False
+        strides = paddle.static.data(
+            name='strides', shape=[3], dtype='int32'
         )
 
-        x = fluid.layers.data(
+        x = paddle.static.data(
             name="x",
             shape=[3, 4, 5, 6],
-            append_batch_size=False,
             dtype="float32",
         )
         out_1 = paddle.strided_slice(
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_sync_batch_norm_op_mlu_extra.py b/python/paddle/fluid/tests/unittests/mlu/test_sync_batch_norm_op_mlu_extra.py
index d9ec53af9b489d70614a8595891c16f02b92ca28..eb048e1ee60588e9b60957fddf553c3f6b648919 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_sync_batch_norm_op_mlu_extra.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_sync_batch_norm_op_mlu_extra.py
@@ -46,7 +46,7 @@ class TestDygraphSyncBatchNormAPIError(unittest.TestCase):
             self.assertRaises(TypeError, my_sync_batch_norm, x1)
 
             # the input dtype of SyncBatchNorm must be float16 or float32
-            x2 = fluid.layers.data(name='x2', shape=[3, 4, 5, 6], dtype="int32")
+            x2 = paddle.static.data(name='x2', shape=[-1, 3, 4, 5, 6], dtype="int32")
             self.assertRaises(TypeError, my_sync_batch_norm, x2)
 
 
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_tile_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_tile_op_mlu.py
index 2f1a0d990bf01a262a72433d7d949c6d7e28d0ad..c3f4b8a755558f191d21aa869a7deb2bd165f104 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_tile_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_tile_op_mlu.py
@@ -221,9 +221,9 @@ class TestTileError(unittest.TestCase):
             )
             repeat_times = [2, 2]
             self.assertRaises(TypeError, paddle.tile, x1, repeat_times)
-            x2 = fluid.layers.data(name='x2', shape=[4], dtype="uint8")
+            x2 = paddle.static.data(name='x2', shape=[-1, 4], dtype="uint8")
             self.assertRaises(TypeError, paddle.tile, x2, repeat_times)
-            x3 = fluid.layers.data(name='x3', shape=[4], dtype="bool")
+            x3 = paddle.static.data(name='x3', shape=[-1, 4], dtype="bool")
             x3.stop_gradient = False
             self.assertRaises(ValueError, paddle.tile, x3, repeat_times)
 
@@ -232,7 +232,7 @@ class TestTileAPIStatic(unittest.TestCase):
     def test_api(self):
         with program_guard(Program(), Program()):
             repeat_times = [2, 2]
-            x1 = fluid.layers.data(name='x1', shape=[4], dtype="int32")
+            x1 = paddle.static.data(name='x1', shape=[-1, 4], dtype="int32")
             out = paddle.tile(x1, repeat_times)
             positive_2 = fluid.layers.fill_constant([1], dtype="int32", value=2)
             out2 = paddle.tile(x1, repeat_times=[positive_2, 2])
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_transpose_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_transpose_op_mlu.py
index a802f9da215b45e35636cf1a50f67d7c8ec508ea..60cd1e27c70ba36fd95800ffe68ba390cfacebcf 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_transpose_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_transpose_op_mlu.py
@@ -188,7 +188,7 @@ class TestTransposeOpError(unittest.TestCase):
     def test_errors(self):
         paddle.enable_static()
         with program_guard(Program(), Program()):
-            x = fluid.layers.data(name='x', shape=[10, 5, 3], dtype='float32')
+            x = paddle.static.data(name='x', shape=[-1, 10, 5, 3], dtype='float32')
 
             def test_x_Variable_check():
                 # the Input(x)'s type must be Variable
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_where_index_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_where_index_op_mlu.py
index 877207eab7d2e620e1008814dfa44e3306680530..a12c4427a786d51d461befd98ad8903454398c60 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_where_index_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_where_index_op_mlu.py
@@ -107,7 +107,7 @@ class TestRank3(TestWhereIndexOp):
 class TestWhereOpError(unittest.TestCase):
     def test_api(self):
         with program_guard(Program(), Program()):
-            cond = fluid.layers.data(name='cond', shape=[4], dtype='bool')
+            cond = paddle.static.data(name='cond', shape=[-1, 4], dtype='bool')
             result = paddle.nonzero(cond)
 
             exe = fluid.Executor(paddle.device.MLUPlace(0))
diff --git a/python/paddle/fluid/tests/unittests/mlu/test_where_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_where_op_mlu.py
index a2d0c1a699dba8b5e98e5d9020266be0e66289b3..a2fc6e2073c667ceab1692aea26a5b3b7b84e3da 100644
--- a/python/paddle/fluid/tests/unittests/mlu/test_where_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_where_op_mlu.py
@@ -90,14 +90,14 @@ class TestWhereAPI(unittest.TestCase):
         for x_stop_gradient in [False, True]:
             for y_stop_gradient in [False, True]:
                 with fluid.program_guard(Program(), Program()):
-                    cond = fluid.layers.data(
-                        name='cond', shape=self.shape, dtype='bool'
+                    cond = paddle.static.data(
+                        name='cond', shape=[-1] + self.shape, dtype='bool'
                     )
-                    x = fluid.layers.data(
-                        name='x', shape=self.shape, dtype='float32'
+                    x = paddle.static.data(
+                        name='x', shape=[-1] + self.shape, dtype='float32'
                     )
-                    y = fluid.layers.data(
-                        name='y', shape=self.shape, dtype='float32'
+                    y = paddle.static.data(
+                        name='y', shape=[-1] + self.shape, dtype='float32'
                     )
                     x.stop_gradient = x_stop_gradient
                     y.stop_gradient = y_stop_gradient
@@ -137,8 +137,8 @@ class TestWhereAPI(unittest.TestCase):
     def test_api_broadcast(self, use_mlu=False):
         main_program = Program()
         with fluid.program_guard(main_program):
-            x = fluid.layers.data(name='x', shape=[4, 1], dtype='float32')
-            y = fluid.layers.data(name='y', shape=[4, 2], dtype='float32')
+            x = paddle.static.data(name='x', shape=[-1, 4, 1], dtype='float32')
+            y = paddle.static.data(name='y', shape=[-1, 4, 2], dtype='float32')
             x_i = np.array([[0.9383, 0.1983, 3.2, 1.2]]).astype('float32')
             y_i = np.array([[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0]]).astype(
                 'float32'
@@ -161,8 +161,8 @@ class TestWhereAPI(unittest.TestCase):
         main_program = Program()
         with fluid.program_guard(main_program):
             cond_shape = [2, 4]
-            cond = fluid.layers.data(
-                name='cond', shape=cond_shape, dtype='bool'
+            cond = paddle.static.data(
+                name='cond', shape=[-1] + cond_shape, dtype='bool'
             )
             x_data = 1.0
             y_data = 2.0
@@ -185,11 +185,11 @@ class TestWhereAPI(unittest.TestCase):
         paddle.enable_static()
         main_program = Program()
         with fluid.program_guard(main_program):
-            cond = fluid.layers.data(
-                name='cond', shape=cond_shape, dtype='bool'
+            cond = paddle.static.data(
+                name='cond', shape=[-1] + cond_shape, dtype='bool'
             )
-            x = fluid.layers.data(name='x', shape=x_shape, dtype='float32')
-            y = fluid.layers.data(name='y', shape=y_shape, dtype='float32')
+            x = paddle.static.data(name='x', shape=[-1] + x_shape, dtype='float32')
+            y = paddle.static.data(name='y', shape=[-1] + y_shape, dtype='float32')
             cond_data_tmp = np.random.random(size=cond_shape).astype('float32')
             cond_data = cond_data_tmp < 0.3
             x_data = np.random.random(size=x_shape).astype('float32')
@@ -340,7 +340,7 @@ class TestWhereDygraphAPI(unittest.TestCase):
     def test_where_condition(self):
         data = np.array([[True, False], [False, True]])
         with program_guard(Program(), Program()):
-            x = fluid.layers.data(name='x', shape=[(-1), 2])
+            x = paddle.static.data(name='x', shape=[(-1), 2])
             y = paddle.where(x)
             self.assertEqual(type(y), tuple)
             self.assertEqual(len(y), 2)
@@ -353,7 +353,7 @@ class TestWhereDygraphAPI(unittest.TestCase):
         np.testing.assert_allclose(expect_out, np.array(res))
         data = np.array([True, True, False])
         with program_guard(Program(), Program()):
-            x = fluid.layers.data(name='x', shape=[(-1)])
+            x = paddle.static.data(name='x', shape=[(-1)])
             y = paddle.where(x)
             self.assertEqual(type(y), tuple)
             self.assertEqual(len(y), 1)
@@ -379,9 +379,9 @@ class TestWhereOpError(unittest.TestCase):
             self.assertRaises(TypeError, test_Variable)
 
             def test_type():
-                x = fluid.layers.data(name='x', shape=[4], dtype='bool')
-                y = fluid.layers.data(name='y', shape=[4], dtype='float16')
-                cond = fluid.layers.data(name='cond', shape=[4], dtype='int32')
+                x = paddle.static.data(name='x', shape=[-1, 4], dtype='bool')
+                y = paddle.static.data(name='y', shape=[-1, 4], dtype='float16')
+                cond = paddle.static.data(name='cond', shape=[-1, 4], dtype='int32')
                 paddle.where(cond, x, y)
 
             self.assertRaises(TypeError, test_type)
diff --git a/python/paddle/fluid/tests/unittests/npu/collective_identity_op_npu.py b/python/paddle/fluid/tests/unittests/npu/collective_identity_op_npu.py
index f8f8612ca89e32ceee64e1f1ab7acd69c80612d3..c0a871e51f6737c603e976aeba30cd32b192cda7 100644
--- a/python/paddle/fluid/tests/unittests/npu/collective_identity_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/collective_identity_op_npu.py
@@ -42,8 +42,8 @@ class TestCollectiveIdentity(TestCollectiveRunnerBase):
         ring_id = 0
         nranks = 2
         with fluid.program_guard(main_prog, startup_program):
-            tindata = layers.data(
-                name="tindata", shape=[10, 1000], dtype='float32'
+            tindata = paddle.static.data(
+                name="tindata", shape=[-1, 10, 1000], dtype='float32'
             )
             toutdata = main_prog.current_block().create_var(
                 name="outofgather",
diff --git a/python/paddle/fluid/tests/unittests/npu/sync_batch_norm_op_npu.py b/python/paddle/fluid/tests/unittests/npu/sync_batch_norm_op_npu.py
index 72c822ad3a58e5e9f80ac811749c335b998855b0..0d0b469881894541bae31fc9805d4de099f520e7 100644
--- a/python/paddle/fluid/tests/unittests/npu/sync_batch_norm_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/sync_batch_norm_op_npu.py
@@ -74,11 +74,10 @@ class TestSyncBatchNormOpTraining(TestSyncBatchNormRunnerBase):
         use_cudnn = False
         with fluid.unique_name.guard():
             with fluid.program_guard(main, startup):
-                data = fluid.layers.data(
+                data = paddle.static.data(
                     name='input',
-                    shape=self.dshape,
+                    shape=[-1] + self.dshape,
                     dtype=self.dtype,
-                    append_batch_size=False,
                 )
                 conv = paddle.static.nn.conv2d(
                     input=data,
diff --git a/python/paddle/fluid/tests/unittests/npu/test_atan_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_atan_op_npu.py
index fe22453f4fa45f71912681676fbea290245b6804..c6fa750a17b7a8a7bb6b30b075576ff60eb519d2 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_atan_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_atan_op_npu.py
@@ -52,7 +52,7 @@ class TestAtan(OpTest):
     def test_out_name(self):
         with fluid.program_guard(fluid.Program()):
             np_x = np.array([0.1])
-            data = fluid.layers.data(name="X", shape=[1])
+            data = paddle.static.data(name="X", shape=[1])
             out = paddle.atan(data, name='Y')
             place = paddle.NPUPlace(0)
             exe = fluid.Executor(place)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_clip_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_clip_op_npu.py
index 122429a7f8454cd687b90b3e503b531727d478f0..37dcf8465bc2193367810d3aec791a22657fb138 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_clip_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_clip_op_npu.py
@@ -122,7 +122,7 @@ class TestClipOpError(unittest.TestCase):
             self.assertRaises(TypeError, test_Variable)
 
             def test_dtype():
-                x2 = fluid.layers.data(name='x2', shape=[1], dtype='int32')
+                x2 = paddle.static.data(name='x2', shape=[-1, 1], dtype='int32')
                 paddle.clip(x=x2, min=-1.0, max=1.0)
 
             self.assertRaises(TypeError, test_dtype)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_compare_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_compare_op_npu.py
index 4fd0be14c12b54b22178543eb5dddf8df481dfb5..6caa442fc05357b542c13d7f7612b79a4e7b3484 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_compare_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_compare_op_npu.py
@@ -44,9 +44,9 @@ def create_test_class(op_type, typename, callback):
         def test_errors(self):
             paddle.enable_static()
             with program_guard(Program(), Program()):
-                a = fluid.layers.data(name='a', shape=[2], dtype='float32')
-                b = fluid.layers.data(name='b', shape=[2], dtype='float32')
-                c = fluid.layers.data(name='c', shape=[2], dtype='int16')
+                a = paddle.static.data(name='a', shape=[-1, 2], dtype='float32')
+                b = paddle.static.data(name='b', shape=[-1, 2], dtype='float32')
+                c = paddle.static.data(name='c', shape=[-1, 2], dtype='int16')
                 d = fluid.create_lod_tensor(np.array([[-1]]), [[1]], self.place)
 
                 op = eval("fluid.layers.%s" % self.op_type)
@@ -134,8 +134,8 @@ def create_test_class(op_type, typename, callback):
         def test_attr_name(self):
             paddle.enable_static()
             with program_guard(Program(), Program()):
-                x = fluid.layers.data(name='x', shape=[4], dtype=typename)
-                y = fluid.layers.data(name='y', shape=[4], dtype=typename)
+                x = paddle.static.data(name='x', shape=[-1, 4], dtype=typename)
+                y = paddle.static.data(name='y', shape=[-1, 4], dtype=typename)
                 op = eval("paddle.%s" % (self.op_type))
                 out = op(x=x, y=y, name="name_%s" % (self.op_type))
             self.assertEqual("name_%s" % (self.op_type) in out.name, True)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_conv2d_transpose_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_conv2d_transpose_op_npu.py
index aae34ebfb5be22c44f5ce3c56c4fcb65b7269a61..b2f966629dad38001ebb23298b73d886ac6a2318 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_conv2d_transpose_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_conv2d_transpose_op_npu.py
@@ -429,11 +429,11 @@ class TestWithEvenUpsample_NHWC_output_padding_FP16(
 
 class TestConv2DTransposeAPI(unittest.TestCase):
     def test_case1(self):
-        data1 = fluid.layers.data(
-            name='data1', shape=[3, 5, 5], dtype='float32'
+        data1 = paddle.static.data(
+            name='data1', shape=[-1, 3, 5, 5], dtype='float32'
         )
-        data2 = fluid.layers.data(
-            name='data2', shape=[5, 5, 3], dtype='float32'
+        data2 = paddle.static.data(
+            name='data2', shape=[-1, 5, 5, 3], dtype='float32'
         )
         out1 = paddle.static.nn.conv2d_transpose(
             input=data1,
diff --git a/python/paddle/fluid/tests/unittests/npu/test_conv3d_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_conv3d_op_npu.py
index 65ec9a489f4a532b4d79755aed10e688b9eee51f..cdb7def1c242737bcdaacf2d5c526c1e5942899f 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_conv3d_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_conv3d_op_npu.py
@@ -350,17 +350,15 @@ class TestCase1_AsyPadding(TestConv3DOp_2):
 class TestConv3DAPI(unittest.TestCase):
     def test_api(self):
 
-        input_NDHWC = fluid.layers.data(
+        input_NDHWC = paddle.static.data(
             name="input_NDHWC",
             shape=[2, 5, 5, 5, 3],
-            append_batch_size=False,
             dtype="float32",
         )
 
-        input_NCDHW = fluid.layers.data(
+        input_NCDHW = paddle.static.data(
             name="input_NCDHW",
             shape=[2, 3, 5, 5, 3],
-            append_batch_size=False,
             dtype="float32",
         )
 
@@ -433,10 +431,9 @@ class TestConv3DAPI(unittest.TestCase):
 
 class TestConv3DAPI_Error(unittest.TestCase):
     def test_api(self):
-        input = fluid.layers.data(
+        input = paddle.static.data(
             name="input",
             shape=[2, 5, 5, 5, 4],
-            append_batch_size=False,
             dtype="float32",
         )
 
@@ -519,10 +516,9 @@ class TestConv3DAPI_Error(unittest.TestCase):
         self.assertRaises(ValueError, run_5)
 
         # ValueError: channel dimmention
-        x = fluid.layers.data(
+        x = paddle.static.data(
             name="x",
             shape=[2, 5, 5, 5, -1],
-            append_batch_size=False,
             dtype="float32",
         )
 
diff --git a/python/paddle/fluid/tests/unittests/npu/test_expand_as_v2_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_expand_as_v2_op_npu.py
index ca2b0195b4280e2c739f51339e3d87115e464673..9625f72d8cff816062e3bd28de006e7e80d7f8a9 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_expand_as_v2_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_expand_as_v2_op_npu.py
@@ -122,14 +122,13 @@ class TestExpandAsV2API(unittest.TestCase):
     def test_api(self):
         input1 = np.random.random([12, 14]).astype("float32")
         input2 = np.random.random([2, 12, 14]).astype("float32")
-        x = fluid.layers.data(
-            name='x', shape=[12, 14], append_batch_size=False, dtype="float32"
+        x = paddle.static.data(
+            name='x', shape=[12, 14], dtype="float32"
         )
 
-        y = fluid.layers.data(
+        y = paddle.static.data(
             name='target_tensor',
             shape=[2, 12, 14],
-            append_batch_size=False,
             dtype="float32",
         )
 
diff --git a/python/paddle/fluid/tests/unittests/npu/test_expand_v2_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_expand_v2_op_npu.py
index 0ea52f04d9d7693465de77209f20049da59a98a6..2176f67047e8b4ccd88924265de69b9eb09712e3 100755
--- a/python/paddle/fluid/tests/unittests/npu/test_expand_v2_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_expand_v2_op_npu.py
@@ -252,9 +252,9 @@ class TestExpandV2Error(unittest.TestCase):
             )
             shape = [2, 2]
             self.assertRaises(TypeError, paddle.tensor.expand, x1, shape)
-            x2 = fluid.layers.data(name='x2', shape=[2], dtype="uint8")
+            x2 = paddle.static.data(name='x2', shape=[-1, 2], dtype="uint8")
             self.assertRaises(TypeError, paddle.tensor.expand, x2, shape)
-            x3 = fluid.layers.data(name='x3', shape=[2], dtype="bool")
+            x3 = paddle.static.data(name='x3', shape=[-1, 2], dtype="bool")
             x3.stop_gradient = False
             self.assertRaises(ValueError, paddle.tensor.expand, x3, shape)
 
@@ -264,18 +264,16 @@ class TestExpandV2API(unittest.TestCase):
     def test_static(self):
         with fluid.program_guard(fluid.Program(), fluid.Program()):
             input = np.random.random([12, 14]).astype("float32")
-            x = fluid.layers.data(
+            x = paddle.static.data(
                 name='x',
                 shape=[12, 14],
-                append_batch_size=False,
                 dtype="float32",
             )
 
             positive_2 = fluid.layers.fill_constant([1], "int32", 12)
-            expand_shape = fluid.layers.data(
+            expand_shape = paddle.static.data(
                 name="expand_shape",
                 shape=[2],
-                append_batch_size=False,
                 dtype="int32",
             )
 
diff --git a/python/paddle/fluid/tests/unittests/npu/test_gather_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_gather_op_npu.py
index a6fa001076c2a53c7f6ed842c9fd04ffc5036d52..3ce9042d75fbda9a7a8b521d076efb0250ccb78f 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_gather_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_gather_op_npu.py
@@ -84,8 +84,8 @@ class TestCase1(TestGatherOp):
 class API_TestGather(unittest.TestCase):
     def test_out1(self):
         with fluid.program_guard(fluid.Program(), fluid.Program()):
-            data1 = fluid.layers.data('data1', shape=[-1, 2], dtype='float32')
-            index = fluid.layers.data('index', shape=[-1, 1], dtype='int32')
+            data1 = paddle.static.data('data1', shape=[-1, 2], dtype='float32')
+            index = paddle.static.data('index', shape=[-1, 1], dtype='int32')
             out = paddle.gather(data1, index)
             place = paddle.NPUPlace(0)
             exe = fluid.Executor(place)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_group_norm_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_group_norm_op_npu.py
index 7f95e2b55c66df89ffefc9dfa151eb06428f3bd7..56430ee7c13aacec87d0acc2731b5f16c53fcd0a 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_group_norm_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_group_norm_op_npu.py
@@ -57,8 +57,8 @@ class TestGroupNormOpError(unittest.TestCase):
             self.assertRaises(TypeError, test_x_type)
 
             def test_x_dtype():
-                x2 = fluid.layers.data(
-                    name='x2', shape=[2, 100, 3, 5], dtype='int32'
+                x2 = paddle.static.data(
+                    name='x2', shape=[-1, 2, 100, 3, 5], dtype='int32'
                 )
                 groups = 2
                 paddle.static.nn.group_norm(x2, groups)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_momentum_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_momentum_op_npu.py
index b500b44e7e57cc0812b9572f005fa61e8c8e27e9..440f681b018a74c102951117822fa7a64432221f 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_momentum_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_momentum_op_npu.py
@@ -108,8 +108,8 @@ class TestMomentumV2(unittest.TestCase):
         place = fluid.NPUPlace(0)
         main = fluid.Program()
         with fluid.program_guard(main):
-            x = fluid.layers.data(name='x', shape=[13], dtype='float32')
-            y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+            x = paddle.static.data(name='x', shape=[-1, 13], dtype='float32')
+            y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
             y_predict = paddle.static.nn.fc(x, size=1, activation=None)
             cost =paddle.nn.functional.square_error_cost(input=y_predict, label=y)
             avg_cost = paddle.mean(cost)
@@ -236,8 +236,8 @@ class TestMomentumOpWithDecayAPI(unittest.TestCase):
         place = fluid.NPUPlace(0)
         main = fluid.Program()
         with fluid.program_guard(main):
-            x = fluid.layers.data(name='x', shape=[13], dtype='float32')
-            y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+            x = paddle.static.data(name='x', shape=[-1, 13], dtype='float32')
+            y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
             y_predict = paddle.static.nn.fc(x, size=1, activation=None)
             cost =paddle.nn.functional.square_error_cost(input=y_predict, label=y)
             avg_cost = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_one_hot_v2_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_one_hot_v2_op_npu.py
index 4ccd33134bed7e5b2036baf6ce7f37dbb8ce2726..d50c6dea782419c6b57fed9829b9ae7a7b95715f 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_one_hot_v2_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_one_hot_v2_op_npu.py
@@ -224,7 +224,7 @@ class TestOneHotOpApi(unittest.TestCase):
             )
 
     def _run(self, depth):
-        label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+        label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64")
         one_hot_label = fluid.one_hot(input=label, depth=depth)
 
         place = fluid.NPUPlace(0)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_sin_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_sin_op_npu.py
index 971184956e6d8bb81d48cd5544af02ec54971cb0..b441efd013f59276b0d4452164158c7b12c3cbb5 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_sin_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_sin_op_npu.py
@@ -55,7 +55,7 @@ def test_class(op_type, typename):
         def test_out_name(self):
             with fluid.program_guard(fluid.Program()):
                 np_x = np.array([0.1])
-                data = fluid.layers.data(name="X", shape=[1])
+                data = paddle.static.data(name="X", shape=[-1, 1])
                 out = eval("paddle.%s(data, name='Y')" % self.op_type)
                 place = fluid.NPUPlace(0)
                 exe = fluid.Executor(place)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_split_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_split_op_npu.py
index d6a6cf9d573232aed6f3e5a8e657bb666fd4c875..38cc028760a3d2a1bf03e10b12b1fce0e25e4f3c 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_split_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_split_op_npu.py
@@ -100,7 +100,7 @@ class TestCase5(TestCase1):
 class API_TestSplit(unittest.TestCase):
     def test_out(self):
         with fluid.program_guard(fluid.Program(), fluid.Program()):
-            data = fluid.layers.data('data', shape=[-1, 10], dtype='float32')
+            data = paddle.static.data('data', shape=[-1, 10], dtype='float32')
             x0, x1 = paddle.split(data, num_or_sections=(3, 7), axis=1)
             place = fluid.NPUPlace(0)
             exe = fluid.Executor(place)
@@ -114,7 +114,7 @@ class API_TestSplit(unittest.TestCase):
 class API_TestSplit2(unittest.TestCase):
     def test_out(self):
         with fluid.program_guard(fluid.Program(), fluid.Program()):
-            data = fluid.layers.data('data', shape=[-1, 10], dtype='float32')
+            data = paddle.static.data('data', shape=[-1, 10], dtype='float32')
             x0, x1 = paddle.split(data, num_or_sections=2, axis=1)
             place = fluid.NPUPlace(0)
             exe = fluid.Executor(place)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_stack_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_stack_op_npu.py
index d14b84b645e74c5887f9ff7e6f88647f1bd139c7..1e185195d140adf83478d77d4a2d27ecf06d4d0a 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_stack_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_stack_op_npu.py
@@ -196,9 +196,9 @@ class TestTensorStackAPIWithLoDTensorArray(unittest.TestCase):
 class API_test(unittest.TestCase):
     def test_out(self):
         with fluid.program_guard(fluid.Program(), fluid.Program()):
-            data1 = fluid.layers.data('data1', shape=[1, 2], dtype='float32')
-            data2 = fluid.layers.data('data2', shape=[1, 2], dtype='float32')
-            data3 = fluid.layers.data('data3', shape=[1, 2], dtype='float32')
+            data1 = paddle.static.data('data1', shape=[-1, 1, 2], dtype='float32')
+            data2 = paddle.static.data('data2', shape=[-1, 1, 2], dtype='float32')
+            data3 = paddle.static.data('data3', shape=[-1, 1, 2], dtype='float32')
             result_stack = paddle.stack([data1, data2, data3], axis=0)
             place = paddle.NPUPlace(0)
             exe = fluid.Executor(place)
diff --git a/python/paddle/fluid/tests/unittests/npu/test_strided_slice_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_strided_slice_op_npu.py
index c4470b101d18435919fcc6b9a3038c7e5df544bb..1bee4627e16ac11680466372ea516cabdb265199 100755
--- a/python/paddle/fluid/tests/unittests/npu/test_strided_slice_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_strided_slice_op_npu.py
@@ -580,20 +580,19 @@ class TestStridedSliceAPI(unittest.TestCase):
         input = np.random.random([3, 4, 5, 6]).astype("float64")
         minus_1 = fluid.layers.fill_constant([1], "int32", -1)
         minus_3 = fluid.layers.fill_constant([1], "int32", -3)
-        starts = fluid.layers.data(
-            name='starts', shape=[3], dtype='int32', append_batch_size=False
+        starts = paddle.static.data(
+            name='starts', shape=[3], dtype='int32'
         )
-        ends = fluid.layers.data(
-            name='ends', shape=[3], dtype='int32', append_batch_size=False
+        ends = paddle.static.data(
+            name='ends', shape=[3], dtype='int32'
         )
-        strides = fluid.layers.data(
-            name='strides', shape=[3], dtype='int32', append_batch_size=False
+        strides = paddle.static.data(
+            name='strides', shape=[3], dtype='int32'
         )
 
-        x = fluid.layers.data(
+        x = paddle.static.data(
             name="x",
             shape=[3, 4, 5, 6],
-            append_batch_size=False,
             dtype="float64",
         )
         out_1 = paddle.strided_slice(
diff --git a/python/paddle/fluid/tests/unittests/npu/test_sync_batch_norm_op_npu_extra.py b/python/paddle/fluid/tests/unittests/npu/test_sync_batch_norm_op_npu_extra.py
index 5c8d00b8fc272aca00b92be60ed9cee001dfc909..9cd59bb8a5c75b2d7f47b0800f0e34a527451ffe 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_sync_batch_norm_op_npu_extra.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_sync_batch_norm_op_npu_extra.py
@@ -46,7 +46,7 @@ class TestDygraphSyncBatchNormAPIError(unittest.TestCase):
 
             # the input dtype of SyncBatchNorm must be float16 or float32
             # float16 only can be set on GPU place and NPU place
-            x2 = fluid.layers.data(name='x2', shape=[3, 4, 5, 6], dtype="int32")
+            x2 = paddle.static.data(name='x2', shape=[-1, 3, 4, 5, 6], dtype="int32")
             self.assertRaises(TypeError, my_sync_batch_norm, x2)
 
 
diff --git a/python/paddle/fluid/tests/unittests/npu/test_where_index_npu.py b/python/paddle/fluid/tests/unittests/npu/test_where_index_npu.py
index 315f475a5c5bfd28699041c5bd6de919e0e07a22..a2bb351d6549b3253dacad5259a336a1b18d8f1a 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_where_index_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_where_index_npu.py
@@ -97,7 +97,7 @@ class TestRank3(TestWhereIndexOp):
 class TestWhereOpError(unittest.TestCase):
     def test_api(self):
         with program_guard(Program(), Program()):
-            cond = fluid.layers.data(name='cond', shape=[4], dtype='bool')
+            cond = paddle.static.data(name='cond', shape=[-1, 4], dtype='bool')
             result = paddle.nonzero(cond)
 
             exe = fluid.Executor(paddle.NPUPlace(0))
diff --git a/python/paddle/fluid/tests/unittests/npu/test_where_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_where_op_npu.py
index 9e1126d0aa0436204f28e904f3e8f4cd439dfc77..5e9baa696fc368fffb85abe8f8b8643cd0c84008 100755
--- a/python/paddle/fluid/tests/unittests/npu/test_where_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_where_op_npu.py
@@ -133,8 +133,8 @@ class TestNPUWhereAPI(unittest.TestCase):
         train_prog = fluid.Program()
         startup = fluid.Program()
         with fluid.program_guard(train_prog, startup):
-            x = fluid.layers.data(name='x', shape=[4, 1], dtype='float32')
-            y = fluid.layers.data(name='y', shape=[4, 2], dtype='float32')
+            x = paddle.static.data(name='x', shape=[-1, 4, 1], dtype='float32')
+            y = paddle.static.data(name='y', shape=[-1, 4, 2], dtype='float32')
             x_i = np.array([[0.9383, 0.1983, 3.2, 1.2]]).astype("float32")
             y_i = np.array([[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0]]).astype(
                 "float32"
diff --git a/python/paddle/fluid/tests/unittests/npu/test_while_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_while_op_npu.py
index 8314f836c7cc03afae360cc0d0bfb751c7118343..a8bcfeae68d3f618b2480734c7e16e0bb3b5f414 100644
--- a/python/paddle/fluid/tests/unittests/npu/test_while_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_while_op_npu.py
@@ -27,14 +27,14 @@ paddle.enable_static()
 
 class TestWhileOp(unittest.TestCase):
     def simple_net(self):
-        d0 = layers.data(
-            "d0", shape=[10], append_batch_size=False, dtype='float32'
+        d0 = paddle.static.data(
+            "d0", shape=[10], dtype='float32'
         )
-        d1 = layers.data(
-            "d1", shape=[10], append_batch_size=False, dtype='float32'
+        d1 = paddle.static.data(
+            "d1", shape=[10], dtype='float32'
         )
-        d2 = layers.data(
-            "d2", shape=[10], append_batch_size=False, dtype='float32'
+        d2 = paddle.static.data(
+            "d2", shape=[10], dtype='float32'
         )
         # fill_constant npu op doesn't support int64
         i = layers.zeros(shape=[1], dtype='int32')
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/CMakeLists.txt b/python/paddle/fluid/tests/unittests/prim/prim/CMakeLists.txt
index db4822bce3f91bfdfff8dfeeedb7ba1ae0ba45be..80c5c8fe1538f8e378f1d3b0f9f37eeeba1fcbb8 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/prim/prim/CMakeLists.txt
@@ -9,3 +9,4 @@ foreach(TEST_OP ${TEST_OPS})
 endforeach()
 
 add_subdirectory(vjp)
+add_subdirectory(flags)
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/flags/CMakeLists.txt b/python/paddle/fluid/tests/unittests/prim/prim/flags/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..72c6bbd7d05e8fdf99fce350ad15c216dcac5c92
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/prim/prim/flags/CMakeLists.txt
@@ -0,0 +1,9 @@
+file(
+  GLOB TEST_OPS
+  RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
+  "test_*.py")
+string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
+
+foreach(TEST_OP ${TEST_OPS})
+  py_test_modules(${TEST_OP} MODULES ${TEST_OP} ENVS ${GC_ENVS})
+endforeach()
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/flags/test_prim_flags.py b/python/paddle/fluid/tests/unittests/prim/prim/flags/test_prim_flags.py
new file mode 100644
index 0000000000000000000000000000000000000000..8f3053af919e926c274a0f5d5bfba7d75ed12805
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/prim/prim/flags/test_prim_flags.py
@@ -0,0 +1,52 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import unittest
+
+from paddle.fluid import core
+
+
+class TestPrimFlags(unittest.TestCase):
+    def test_prim_flags(self):
+        self.assertFalse(core._is_bwd_prim_enabled())
+        self.assertFalse(core._is_fwd_prim_enabled())
+
+        os.environ['FLAGS_prim_backward'] = "True"
+        core.check_and_set_prim_all_enabled()
+        self.assertTrue(core._is_bwd_prim_enabled())
+        os.environ['FLAGS_prim_forward'] = "True"
+        core.check_and_set_prim_all_enabled()
+        self.assertTrue(core._is_fwd_prim_enabled())
+        os.environ['FLAGS_prim_all'] = "False"
+        core.check_and_set_prim_all_enabled()
+        self.assertFalse(core._is_bwd_prim_enabled())
+        self.assertFalse(core._is_fwd_prim_enabled())
+
+        os.environ['FLAGS_prim_all'] = "True"
+        core.check_and_set_prim_all_enabled()
+        self.assertTrue(core._is_bwd_prim_enabled())
+        self.assertTrue(core._is_fwd_prim_enabled())
+
+        del os.environ['FLAGS_prim_all']
+        os.environ['FLAGS_prim_backward'] = "False"
+        core.check_and_set_prim_all_enabled()
+        self.assertFalse(core._is_bwd_prim_enabled())
+        os.environ['FLAGS_prim_forward'] = "False"
+        core.check_and_set_prim_all_enabled()
+        self.assertFalse(core._is_fwd_prim_enabled())
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_add_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_add_grad.py
index e05fef3b18d129bb702e82129131aef645cf02bb..b5a183add8cd0a3191dc670a5cf710eabb5351ec 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_add_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_add_grad.py
@@ -20,7 +20,7 @@ import parameterized as param
 import paddle
 from paddle.fluid import core
 
-core.set_prim_enabled(True)
+core._set_prim_backward_enabled(True)
 
 
 @param.parameterized_class(
@@ -67,7 +67,7 @@ class TestTanhGradComp(unittest.TestCase):
 
     def test_tanh_grad_comp(self):
         def actual(primal0, primal1):
-            core.set_prim_enabled(True)
+            core._set_prim_backward_enabled(True)
             paddle.disable_static()
             x = paddle.to_tensor(primal0, dtype='float32', stop_gradient=False)
             y = paddle.to_tensor(primal1, dtype='float32', stop_gradient=False)
@@ -78,7 +78,7 @@ class TestTanhGradComp(unittest.TestCase):
             return res[0].numpy(), res[1].numpy()
 
         def desired(primal0, primal1):
-            core.set_prim_enabled(False)
+            core._set_prim_backward_enabled(False)
             paddle.disable_static()
             x = paddle.to_tensor(primal0, dtype='float32', stop_gradient=False)
             y = paddle.to_tensor(primal1, dtype='float32', stop_gradient=False)
@@ -104,7 +104,7 @@ class TestTanhGradComp(unittest.TestCase):
             rtol=1e-6,
             atol=0,
         )
-        core.set_prim_enabled(False)
+        core._set_prim_backward_enabled(False)
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_div_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_div_grad.py
index c9ae5cd7ecbafd7f554dc24a38a40b0a42eec854..96e186e32e91041afd734e3e3b6109f1d762f766 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_div_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_div_grad.py
@@ -20,7 +20,7 @@ import parameterized as param
 import paddle
 from paddle.fluid import core
 
-core.set_prim_enabled(True)
+core._set_prim_backward_enabled(True)
 
 
 @param.parameterized_class(
@@ -67,7 +67,7 @@ class TestTanhGradComp(unittest.TestCase):
 
     def test_tanh_grad_comp(self):
         def actual(primal0, primal1):
-            core.set_prim_enabled(True)
+            core._set_prim_backward_enabled(True)
             paddle.disable_static()
             x = paddle.to_tensor(primal0, dtype='float32', stop_gradient=False)
             y = paddle.to_tensor(primal1, dtype='float32', stop_gradient=False)
@@ -78,7 +78,7 @@ class TestTanhGradComp(unittest.TestCase):
             return res[0].numpy(), res[1].numpy()
 
         def desired(primal0, primal1):
-            core.set_prim_enabled(False)
+            core._set_prim_backward_enabled(False)
             paddle.disable_static()
             x = paddle.to_tensor(primal0, dtype='float32', stop_gradient=False)
             y = paddle.to_tensor(primal1, dtype='float32', stop_gradient=False)
@@ -104,7 +104,7 @@ class TestTanhGradComp(unittest.TestCase):
             rtol=1e-6,
             atol=0,
         )
-        core.set_prim_enabled(False)
+        core._set_prim_backward_enabled(False)
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_exp_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_exp_grad.py
index e81314ba041ef7f91f0bdb4f1c266d4bcc92bb72..85974031280820dd0c056b8101340b3d344dc83e 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_exp_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_exp_grad.py
@@ -32,14 +32,14 @@ from paddle.fluid import core
 class TestExpGradComp(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
-        core.set_prim_enabled(True)
+        core._set_prim_backward_enabled(True)
         cls.primal = cls.primal.astype(cls.dtype)
         if cls.cotangent is not None:
             cls.cotangent = cls.cotangent.astype(cls.dtype)
 
     @classmethod
     def tearDownClass(cls):
-        core.set_prim_enabled(False)
+        core._set_prim_backward_enabled(False)
 
     def test_exp_grad_comp(self):
         def actual(primal, cotangent):
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_expand_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_expand_grad.py
index c4de565dc504f73c9e505d091c4e05ec06798d57..92b0b98942caaff065ce513aa0aee11550bf7ab7 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_expand_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_expand_grad.py
@@ -62,7 +62,7 @@ class TestExpandGradComp(unittest.TestCase):
 
     @classmethod
     def tearDownClass(cls):
-        core.set_prim_enabled(False)
+        core._set_prim_backward_enabled(False)
 
     def test_comp(self):
         def func(primal, cotangent, shape):
@@ -74,11 +74,11 @@ class TestExpandGradComp(unittest.TestCase):
             ]
 
         def actual(primal, cotangent, shape):
-            core.set_prim_enabled(True)
+            core._set_prim_backward_enabled(True)
             return func(primal, cotangent, shape)
 
         def desired(primal, cotangent, shape):
-            core.set_prim_enabled(False)
+            core._set_prim_backward_enabled(False)
             return func(primal, cotangent, shape)
 
         np.testing.assert_allclose(
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_multiply_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_multiply_grad.py
index 59daf91ab8b84b391e971ae6c28b75ea7e05b89f..fdef2779a41b8a05fee7159e9190989095b93f36 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_multiply_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_multiply_grad.py
@@ -81,10 +81,10 @@ class TestMultiplyGradComp(unittest.TestCase):
         return [g for g in grads if g is not None]
 
     def test_comp(self):
-        core.set_prim_enabled(True)
+        core._set_prim_backward_enabled(True)
         actual = self.vjp()
 
-        core.set_prim_enabled(False)
+        core._set_prim_backward_enabled(False)
         desired = self.vjp()
 
         for i, j in zip(actual, desired):
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_sqrt_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_sqrt_grad.py
index 7abb91e912ac4fa74cd5b8d4ca1e9d59d4ca219b..a97cb37420145f87aab2036495faa1bedf125015 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_sqrt_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_sqrt_grad.py
@@ -22,7 +22,7 @@ import parameterized as param
 import paddle
 from paddle.fluid import core
 
-core.set_prim_enabled(True)
+core._set_prim_backward_enabled(True)
 
 
 @param.parameterized_class(
@@ -63,7 +63,7 @@ class TestSqrtGradComp(unittest.TestCase):
             rtol=1e-6,
             atol=0,
         )
-        core.set_prim_enabled(False)
+        core._set_prim_backward_enabled(False)
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_sub_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_sub_grad.py
index e508ae63803cb121e6cdce099c178b3cce28c9c2..2a6e758ba42cf3575ee6819ed9ed4d6aec850a88 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_sub_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_sub_grad.py
@@ -20,7 +20,7 @@ import parameterized as param
 import paddle
 from paddle.fluid import core
 
-core.set_prim_enabled(True)
+core._set_prim_backward_enabled(True)
 
 
 @param.parameterized_class(
@@ -67,7 +67,7 @@ class TestTanhGradComp(unittest.TestCase):
 
     def test_tanh_grad_comp(self):
         def actual(primal0, primal1):
-            core.set_prim_enabled(True)
+            core._set_prim_backward_enabled(True)
             paddle.disable_static()
             x = paddle.to_tensor(primal0, dtype='float32', stop_gradient=False)
             y = paddle.to_tensor(primal1, dtype='float32', stop_gradient=False)
@@ -78,7 +78,7 @@ class TestTanhGradComp(unittest.TestCase):
             return res[0].numpy(), res[1].numpy()
 
         def desired(primal0, primal1):
-            core.set_prim_enabled(False)
+            core._set_prim_backward_enabled(False)
             paddle.disable_static()
             x = paddle.to_tensor(primal0, dtype='float32', stop_gradient=False)
             y = paddle.to_tensor(primal1, dtype='float32', stop_gradient=False)
@@ -104,7 +104,7 @@ class TestTanhGradComp(unittest.TestCase):
             rtol=1e-6,
             atol=0,
         )
-        core.set_prim_enabled(False)
+        core._set_prim_backward_enabled(False)
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_sum_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_sum_grad.py
index 5586f7c0ccaf64bd924b30e6053ac9f2932bab30..e7f8b23542e6b8f562796e25b50bf5f9e68bc3a4 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_sum_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_sum_grad.py
@@ -21,7 +21,7 @@ from paddle.fluid import core
 
 
 def actual(primal, cotangent, axis, keep_dim):
-    core.set_prim_enabled(False)
+    core._set_prim_backward_enabled(False)
     x = paddle.to_tensor(primal, dtype='float32', stop_gradient=False)
     v = paddle.to_tensor(cotangent, dtype='float32', stop_gradient=False)
     y = paddle.sum(x, axis=axis, keepdim=keep_dim)
@@ -30,7 +30,7 @@ def actual(primal, cotangent, axis, keep_dim):
 
 
 def desired(primal, cotangent, axis, keep_dim):
-    core.set_prim_enabled(True)
+    core._set_prim_backward_enabled(True)
     x = paddle.to_tensor(primal, dtype='float32', stop_gradient=False)
     v = paddle.to_tensor(cotangent, dtype='float32', stop_gradient=False)
     y = paddle.sum(x, axis=axis, keepdim=keep_dim)
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_tanh_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_tanh_grad.py
index 438f71b573a717efde1def08a118e9fbf1fbfa81..11cc010b2ee130880bff5e4a9bbeccc10c51f057 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_tanh_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/eager/test_comp_eager_tanh_grad.py
@@ -20,7 +20,7 @@ import parameterized as param
 import paddle
 from paddle.fluid import core
 
-core.set_prim_enabled(True)
+core._set_prim_backward_enabled(True)
 
 
 @param.parameterized_class(
@@ -74,7 +74,7 @@ class TestTanhGradComp(unittest.TestCase):
             rtol=1e-6,
             atol=0,
         )
-        core.set_prim_enabled(False)
+        core._set_prim_backward_enabled(False)
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_add_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_add_grad.py
index b7d7969d9aa0469d98e8d460c25bc17058235648..1673ff083e7cf4081b300ab7de6e585e7b7d1c21 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_add_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_add_grad.py
@@ -81,7 +81,7 @@ class TestAddGradComp(unittest.TestCase):
         self.x.stop_gradient = False
         self.y.stop_gradient = False
         net = PrimeNet()
-        core.set_prim_enabled(use_prim)
+        core._set_prim_backward_enabled(use_prim)
         net = apply_to_static(net, use_cinn)
         out = net(self.x, self.y)
         res = paddle.autograd.grad(out, [self.x, self.y])
@@ -104,7 +104,7 @@ class TestAddGradComp(unittest.TestCase):
 
     def test_tanh_grad_comp(self):
         def actual(primal0, primal1):
-            core.set_prim_enabled(True)
+            core._set_prim_backward_enabled(True)
             mp, sp = paddle.static.Program(), paddle.static.Program()
             with paddle.static.program_guard(mp, sp):
                 x = paddle.static.data('primal0', primal0.shape, primal0.dtype)
@@ -126,7 +126,7 @@ class TestAddGradComp(unittest.TestCase):
             return out[0], out[1]
 
         def desired(primal0, primal1):
-            core.set_prim_enabled(False)
+            core._set_prim_backward_enabled(False)
             mp, sp = paddle.static.Program(), paddle.static.Program()
             with paddle.static.program_guard(mp, sp):
                 x = paddle.static.data(
@@ -167,7 +167,7 @@ class TestAddGradComp(unittest.TestCase):
             rtol=1e-6,
             atol=0,
         )
-        core.set_prim_enabled(False)
+        core._set_prim_backward_enabled(False)
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_add_tanh_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_add_tanh_grad.py
index 45cae351a73ebb98d93e526efc26e148a96ef764..5dd7417130bc1137b751ea420384d63350c216b0 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_add_tanh_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_add_tanh_grad.py
@@ -82,7 +82,7 @@ class TestDivGradComp(unittest.TestCase):
         self.x.stop_gradient = False
         self.y.stop_gradient = False
         net = PrimeNet()
-        core.set_prim_enabled(use_prim)
+        core._set_prim_backward_enabled(use_prim)
         net = apply_to_static(net, use_cinn)
         out = net(self.x, self.y)
         res = paddle.autograd.grad(out, [self.x, self.y])
@@ -107,7 +107,7 @@ class TestDivGradComp(unittest.TestCase):
         paddle.enable_static()
 
         def actual(primal0, primal1):
-            core.set_prim_enabled(True)
+            core._set_prim_backward_enabled(True)
             mp, sp = paddle.static.Program(), paddle.static.Program()
             with paddle.static.program_guard(mp, sp):
                 x = paddle.static.data('primal0', primal0.shape, primal0.dtype)
@@ -130,7 +130,7 @@ class TestDivGradComp(unittest.TestCase):
             return out[0], out[1]
 
         def desired(primal0, primal1):
-            core.set_prim_enabled(False)
+            core._set_prim_backward_enabled(False)
             mp, sp = paddle.static.Program(), paddle.static.Program()
             with paddle.static.program_guard(mp, sp):
                 x = paddle.static.data(
@@ -172,7 +172,7 @@ class TestDivGradComp(unittest.TestCase):
             rtol=1e-6,
             atol=0,
         )
-        core.set_prim_enabled(False)
+        core._set_prim_backward_enabled(False)
         paddle.disable_static()
 
 
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_div_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_div_grad.py
index 1d675e8bd097968ed660f52de0c1f658803837c6..95d3c3027fd9d28e4b054806959c8ad8ec391e9a 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_div_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_div_grad.py
@@ -81,7 +81,7 @@ class TestDivGradComp(unittest.TestCase):
         self.x.stop_gradient = False
         self.y.stop_gradient = False
         net = PrimeNet()
-        core.set_prim_enabled(use_prim)
+        core._set_prim_backward_enabled(use_prim)
         net = apply_to_static(net, use_cinn)
         out = net(self.x, self.y)
         res = paddle.autograd.grad(out, [self.x, self.y])
@@ -104,7 +104,7 @@ class TestDivGradComp(unittest.TestCase):
 
     def test_tanh_grad_comp(self):
         def actual(primal0, primal1):
-            core.set_prim_enabled(True)
+            core._set_prim_backward_enabled(True)
             mp, sp = paddle.static.Program(), paddle.static.Program()
             with paddle.static.program_guard(mp, sp):
                 x = paddle.static.data('primal0', primal0.shape, primal0.dtype)
@@ -126,7 +126,7 @@ class TestDivGradComp(unittest.TestCase):
             return out[0], out[1]
 
         def desired(primal0, primal1):
-            core.set_prim_enabled(False)
+            core._set_prim_backward_enabled(False)
             mp, sp = paddle.static.Program(), paddle.static.Program()
             with paddle.static.program_guard(mp, sp):
                 x = paddle.static.data(
@@ -167,7 +167,7 @@ class TestDivGradComp(unittest.TestCase):
             rtol=1e-6,
             atol=0,
         )
-        core.set_prim_enabled(False)
+        core._set_prim_backward_enabled(False)
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_exp_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_exp_grad.py
index c1c76631232c007f4a2a81cb2227035910bb57d8..2e720f6934f5cd8975b8f4137b15de873cc06277 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_exp_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_exp_grad.py
@@ -33,14 +33,14 @@ from paddle.fluid import core
 class TestExpGradComp(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
-        core.set_prim_enabled(True)
+        core._set_prim_backward_enabled(True)
         cls.primal = cls.primal.astype(cls.dtype)
         if cls.cotangent is not None:
             cls.cotangent = cls.cotangent.astype(cls.dtype)
 
     @classmethod
     def tearDownClass(cls):
-        core.set_prim_enabled(False)
+        core._set_prim_backward_enabled(False)
 
     def setUp(self):
         paddle.enable_static()
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_expand_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_expand_grad.py
index c322074d34d88715aa9aec84ca5ca6e05c88aba8..2772719a81820a9e46599b1a3881e10c5e015f95 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_expand_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_expand_grad.py
@@ -71,7 +71,7 @@ class TestExpandGradComp(unittest.TestCase):
     @classmethod
     def tearDownClass(cls):
         paddle.disable_static()
-        core.set_prim_enabled(False)
+        core._set_prim_backward_enabled(False)
 
     def test_comp(self):
         def func(primal, cotangent, shape):
@@ -93,11 +93,11 @@ class TestExpandGradComp(unittest.TestCase):
             )[0]
 
         def actual(primal, cotangent, shape):
-            core.set_prim_enabled(True)
+            core._set_prim_backward_enabled(True)
             return func(primal, cotangent, shape)
 
         def desired(primal, cotangent, shape):
-            core.set_prim_enabled(False)
+            core._set_prim_backward_enabled(False)
             return func(primal, cotangent, shape)
 
         np.testing.assert_allclose(
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_multiply_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_multiply_grad.py
index 63e8a4f1bbf3451bed5c9402a40ffa13a0bbd319..2d1a10a6d4b5794d938b1685f03057ee9b63ca89 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_multiply_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_multiply_grad.py
@@ -108,10 +108,10 @@ class TestMultiplyGradComp(unittest.TestCase):
 
     def test_comp(self):
 
-        core.set_prim_enabled(True)
+        core._set_prim_backward_enabled(True)
         actual = self.vjp()
 
-        core.set_prim_enabled(False)
+        core._set_prim_backward_enabled(False)
         desired = self.vjp()
 
         self.assertEqual(len(actual), len(desired))
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sqrt_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sqrt_grad.py
index 505a4391138e95adb376924499cb95bc43fcb5cb..8df50c768c2b72e11d5de955b3b88e65183c0aad 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sqrt_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sqrt_grad.py
@@ -16,7 +16,7 @@ import unittest
 
 from paddle.fluid import core
 
-core.set_prim_enabled(True)
+core._set_prim_backward_enabled(True)
 
 import autograd
 import autograd.numpy
@@ -60,7 +60,7 @@ class TestSqrtGradComp(unittest.TestCase):
         self.x = paddle.randn([2, 4])
         self.x.stop_gradient = False
         net = PrimeNet()
-        core.set_prim_enabled(use_prim)
+        core._set_prim_backward_enabled(use_prim)
         net = apply_to_static(net, use_cinn)
         out = net(self.x)
         res = paddle.autograd.grad(out, [self.x])
@@ -109,7 +109,7 @@ class TestSqrtGradComp(unittest.TestCase):
             rtol=1e-6,
             atol=0,
         )
-        core.set_prim_enabled(False)
+        core._set_prim_backward_enabled(False)
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sub_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sub_grad.py
index f98a6af621f96f32e99f6d5f46afd5c297e6a528..693bf8b942bab23e9af6b10c5456b8e76936d38b 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sub_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sub_grad.py
@@ -82,7 +82,7 @@ class TestDivGradComp(unittest.TestCase):
         self.x.stop_gradient = False
         self.y.stop_gradient = False
         net = PrimeNet()
-        core.set_prim_enabled(use_prim)
+        core._set_prim_backward_enabled(use_prim)
         net = apply_to_static(net, use_cinn)
         out = net(self.x, self.y)
         res = paddle.autograd.grad(out, [self.x, self.y])
@@ -105,7 +105,7 @@ class TestDivGradComp(unittest.TestCase):
 
     def test_tanh_grad_comp(self):
         def actual(primal0, primal1):
-            core.set_prim_enabled(True)
+            core._set_prim_backward_enabled(True)
             mp, sp = paddle.static.Program(), paddle.static.Program()
             with paddle.static.program_guard(mp, sp):
                 x = paddle.static.data('primal0', primal0.shape, primal0.dtype)
@@ -127,7 +127,7 @@ class TestDivGradComp(unittest.TestCase):
             return out[0], out[1]
 
         def desired(primal0, primal1):
-            core.set_prim_enabled(False)
+            core._set_prim_backward_enabled(False)
             mp, sp = paddle.static.Program(), paddle.static.Program()
             with paddle.static.program_guard(mp, sp):
                 x = paddle.static.data(
@@ -168,7 +168,7 @@ class TestDivGradComp(unittest.TestCase):
             rtol=1e-6,
             atol=0,
         )
-        core.set_prim_enabled(False)
+        core._set_prim_backward_enabled(False)
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sum_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sum_grad.py
index b9b2ad03913cb7127c63c1a57c0d1af5944cff2f..a6b12c7cf623c8d9dede05aa9c7d0fcc64549572 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sum_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sum_grad.py
@@ -21,7 +21,7 @@ from paddle.fluid import core
 
 
 def actual(primal, cotangent, axis, keep_dim):
-    core.set_prim_enabled(False)
+    core._set_prim_backward_enabled(False)
     mp, sp = paddle.static.Program(), paddle.static.Program()
     with paddle.static.program_guard(mp, sp):
         x = paddle.static.data('primal', primal.shape, primal.dtype)
@@ -40,7 +40,7 @@ def actual(primal, cotangent, axis, keep_dim):
 
 
 def desired(primal, cotangent, axis, keep_dim):
-    core.set_prim_enabled(True)
+    core._set_prim_backward_enabled(True)
     mp, sp = paddle.static.Program(), paddle.static.Program()
     with paddle.static.program_guard(mp, sp):
         x = paddle.static.data('primal', primal.shape, primal.dtype)
diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_tanh_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_tanh_grad.py
index c7c9109eeaab0403b87e74a0d9edea8ebe995e21..e643cf620a8118fb1c36202ada5543b60b3f0012 100644
--- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_tanh_grad.py
+++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_tanh_grad.py
@@ -16,7 +16,7 @@ import unittest
 
 from paddle.fluid import core
 
-core.set_prim_enabled(True)
+core._set_prim_backward_enabled(True)
 
 import autograd
 import autograd.numpy
@@ -60,7 +60,7 @@ class TestTanhGradComp(unittest.TestCase):
         self.x = paddle.randn([2, 4])
         self.x.stop_gradient = False
         net = PrimeNet()
-        core.set_prim_enabled(use_prim)
+        core._set_prim_backward_enabled(use_prim)
         net = apply_to_static(net, use_cinn)
         out = net(self.x)
         res = paddle.autograd.grad(out, [self.x])
@@ -109,7 +109,7 @@ class TestTanhGradComp(unittest.TestCase):
             rtol=1e-6,
             atol=0,
         )
-        core.set_prim_enabled(False)
+        core._set_prim_backward_enabled(False)
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/unittests/prim/test_comp_get_grad_op_desc_prim_disabled.py b/python/paddle/fluid/tests/unittests/prim/test_comp_get_grad_op_desc_prim_disabled.py
index 3170313a0d845f383f8b8503ad9c0bc5782f5828..9292a1c4276d6b5b9087e4d52003519010a7a45f 100644
--- a/python/paddle/fluid/tests/unittests/prim/test_comp_get_grad_op_desc_prim_disabled.py
+++ b/python/paddle/fluid/tests/unittests/prim/test_comp_get_grad_op_desc_prim_disabled.py
@@ -17,7 +17,7 @@ import unittest
 
 from paddle.fluid import core
 
-core.set_prim_enabled(False)
+core._set_prim_backward_enabled(False)
 
 import parameterized as param
 
diff --git a/python/paddle/fluid/tests/unittests/prim/test_comp_get_grad_op_desc_prim_enabled.py b/python/paddle/fluid/tests/unittests/prim/test_comp_get_grad_op_desc_prim_enabled.py
index d76f99dc601063aca2e2e6160f4c68fa52280335..18b445f38da3a8ea74f671b52ac74d33624be54a 100644
--- a/python/paddle/fluid/tests/unittests/prim/test_comp_get_grad_op_desc_prim_enabled.py
+++ b/python/paddle/fluid/tests/unittests/prim/test_comp_get_grad_op_desc_prim_enabled.py
@@ -17,7 +17,7 @@ import unittest
 
 from paddle.fluid import core
 
-core.set_prim_enabled(True)
+core._set_prim_backward_enabled(True)
 
 import parameterized as param
 
@@ -77,7 +77,7 @@ class TestGetGradOpDescPrimEnabled(unittest.TestCase):
         )
         print(actual)
         self.assertEquals(actual, self.desired_ops)
-        core.set_prim_enabled(False)
+        core._set_prim_backward_enabled(False)
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/unittests/sequence/test_sequence_concat.py b/python/paddle/fluid/tests/unittests/sequence/test_sequence_concat.py
index cb262f9495f8fd81cb471e4f9f7082f0b5e8d1b1..bd18a205259a3958484f1f27426c8e8d6d7dfc0f 100644
--- a/python/paddle/fluid/tests/unittests/sequence/test_sequence_concat.py
+++ b/python/paddle/fluid/tests/unittests/sequence/test_sequence_concat.py
@@ -89,7 +89,9 @@ class TestSequenceConcatOpError(unittest.TestCase):
     def test_errors(self):
         def test_input_list():
             # the input type must be list
-            x_data = fluid.layers.data(name='x', shape=[4], dtype='float32')
+            x_data = paddle.static.data(
+                name='x', shape=[-1, 4], dtype='float32'
+            )
             fluid.layers.sequence_concat(input=x_data)
 
         self.assertRaises(TypeError, test_input_list)
@@ -97,12 +99,16 @@ class TestSequenceConcatOpError(unittest.TestCase):
         def test_variable1():
             # the input element type must be Variable
             x1_data = np.array([[3, 5]]).astype('float32')
-            y1_data = fluid.layers.data(name='y1', shape=[4], dtype='float32')
+            y1_data = paddle.static.data(
+                name='y1', shape=[-1, 4], dtype='float32'
+            )
             fluid.layers.sequence_concat(input=[x1_data, y1_data])
 
         def test_variable2():
             x2_data = np.array([[3, 5]]).astype('float32')
-            y2_data = fluid.layers.data(name='y2', shape=[4], dtype='float32')
+            y2_data = paddle.static.data(
+                name='y2', shape=[-1, 4], dtype='float32'
+            )
             fluid.layers.sequence_concat(input=[y2_data, x2_data])
 
         for i in range(2):
@@ -113,8 +119,12 @@ class TestSequenceConcatOpError(unittest.TestCase):
 
         def test_dtype():
             # dtype must be 'float32', 'float64', 'int64'
-            x3_data = fluid.layers.data(name="x3", shape=[3, 5], dtype='int32')
-            y3_data = fluid.layers.data(name="y3", shape=[3, 5], dtype='int16')
+            x3_data = paddle.static.data(
+                name="x3", shape=[-1, 3, 5], dtype='int32'
+            )
+            y3_data = paddle.static.data(
+                name="y3", shape=[-1, 3, 5], dtype='int16'
+            )
             input_list = [x3_data, y3_data]
             fluid.layers.sequence_concat(input=input_list)
 
diff --git a/python/paddle/fluid/tests/unittests/sequence/test_sequence_conv.py b/python/paddle/fluid/tests/unittests/sequence/test_sequence_conv.py
index ec296d969c8492cf5a72a6f1ea61a3016d3c8caa..2376716b7ca5bdc4f1d435f3ac9d775ac01ba4fe 100644
--- a/python/paddle/fluid/tests/unittests/sequence/test_sequence_conv.py
+++ b/python/paddle/fluid/tests/unittests/sequence/test_sequence_conv.py
@@ -18,6 +18,8 @@ import unittest
 
 import numpy as np
 
+import paddle
+
 sys.path.append("../")
 from op_test import OpTest
 
@@ -283,7 +285,7 @@ class TestSeqConvApi(unittest.TestCase):
     def test_api(self):
         import paddle.fluid as fluid
 
-        x = fluid.layers.data('x', shape=[32], lod_level=1)
+        x = paddle.static.data('x', shape=[-1, 32], lod_level=1)
         y = fluid.layers.sequence_conv(
             input=x, num_filters=2, filter_size=3, padding_start=None
         )
diff --git a/python/paddle/fluid/tests/unittests/sequence/test_sequence_first_step.py b/python/paddle/fluid/tests/unittests/sequence/test_sequence_first_step.py
index 85e225c896cb52d6b25ff73176375bc5839d637f..704462b040a51fdf247406afc327a31798b23c31 100644
--- a/python/paddle/fluid/tests/unittests/sequence/test_sequence_first_step.py
+++ b/python/paddle/fluid/tests/unittests/sequence/test_sequence_first_step.py
@@ -16,6 +16,7 @@ import unittest
 
 import numpy as np
 
+import paddle
 import paddle.fluid as fluid
 from paddle.fluid.framework import Program, program_guard
 
@@ -35,10 +36,9 @@ class TestSequenceFirstStepOpError(unittest.TestCase):
 
             def test_input_dtype():
                 # the dtype of input must be int64
-                type_data = fluid.layers.data(
+                type_data = paddle.static.data(
                     name='type_data',
                     shape=[7, 1],
-                    append_batch_size=False,
                     dtype='int64',
                     lod_level=1,
                 )
diff --git a/python/paddle/fluid/tests/unittests/sequence/test_sequence_last_step.py b/python/paddle/fluid/tests/unittests/sequence/test_sequence_last_step.py
index e269b46517ecab33bab858c8501f455751c02997..165f941f58576fe540e2aed6cc4608f5178a4aeb 100644
--- a/python/paddle/fluid/tests/unittests/sequence/test_sequence_last_step.py
+++ b/python/paddle/fluid/tests/unittests/sequence/test_sequence_last_step.py
@@ -16,6 +16,7 @@ import unittest
 
 import numpy as np
 
+import paddle
 import paddle.fluid as fluid
 from paddle.fluid.framework import Program, program_guard
 
@@ -35,10 +36,9 @@ class TestSequenceLastStepOpError(unittest.TestCase):
 
             def test_input_dtype():
                 # the dtype of input must be int64
-                type_data = fluid.layers.data(
+                type_data = paddle.static.data(
                     name='type_data',
                     shape=[7, 1],
-                    append_batch_size=False,
                     dtype='int64',
                     lod_level=1,
                 )
diff --git a/python/paddle/fluid/tests/unittests/sequence/test_sequence_pad_op.py b/python/paddle/fluid/tests/unittests/sequence/test_sequence_pad_op.py
index a3dd8373d98fc586fafb576090a7f3b78767de9a..421b4e96893558c7f18b37838ac7bf4f125aae26 100644
--- a/python/paddle/fluid/tests/unittests/sequence/test_sequence_pad_op.py
+++ b/python/paddle/fluid/tests/unittests/sequence/test_sequence_pad_op.py
@@ -17,6 +17,8 @@ import unittest
 
 import numpy as np
 
+import paddle
+
 sys.path.append("../")
 from op_test import OpTest
 
@@ -162,8 +164,8 @@ class TestSequencePadOpError(unittest.TestCase):
         self.assertRaises(TypeError, test_x_variable)
 
         def test_pad_value_variable():
-            x1 = fluid.layers.data(
-                name='x1', shape=[10, 5], dtype='float32', lod_level=1
+            x1 = paddle.static.data(
+                name='x1', shape=[-1, 10, 5], dtype='float32', lod_level=1
             )
             pad_value1 = np.array([0.0], dtype=np.float32)
             fluid.layers.sequence_pad(x=x1, pad_value=pad_value1)
@@ -171,8 +173,8 @@ class TestSequencePadOpError(unittest.TestCase):
         self.assertRaises(TypeError, test_pad_value_variable)
 
         def test_dtype():
-            x2 = fluid.layers.data(
-                name='x2', shape=[10, 5], dtype='int16', lod_level=1
+            x2 = paddle.static.data(
+                name='x2', shape=[-1, 10, 5], dtype='int16', lod_level=1
             )
             pad_value2 = fluid.layers.assign(
                 input=np.array([0.0], dtype=np.int32)
diff --git a/python/paddle/fluid/tests/unittests/sequence/test_sequence_reshape.py b/python/paddle/fluid/tests/unittests/sequence/test_sequence_reshape.py
index 5cda07af7de90094948c8276c61c8f9481347e69..9f903251507eba0c3981a1fabb02a45a119bb9b0 100644
--- a/python/paddle/fluid/tests/unittests/sequence/test_sequence_reshape.py
+++ b/python/paddle/fluid/tests/unittests/sequence/test_sequence_reshape.py
@@ -17,6 +17,8 @@ import unittest
 
 import numpy as np
 
+import paddle
+
 sys.path.append("../")
 from op_test import OpTest
 
@@ -93,10 +95,9 @@ class TestSequenceReshapeOpError(unittest.TestCase):
         self.assertRaises(TypeError, test_variable)
 
         def test_dtype():
-            x1 = fluid.layers.data(
+            x1 = paddle.static.data(
                 name='x1',
-                shape=[2, 6],
-                append_batch_size=False,
+                shape=[-1, 2, 6],
                 dtype='float16',
                 lod_level=1,
             )
diff --git a/python/paddle/fluid/tests/unittests/sequence/test_sequence_reverse.py b/python/paddle/fluid/tests/unittests/sequence/test_sequence_reverse.py
index 13897df00ab939c8478caa3accc97525b910e4be..f141806aac2339ee2fd7a78531e03d8bcbbb07af 100644
--- a/python/paddle/fluid/tests/unittests/sequence/test_sequence_reverse.py
+++ b/python/paddle/fluid/tests/unittests/sequence/test_sequence_reverse.py
@@ -17,6 +17,7 @@ import unittest
 
 import numpy as np
 
+import paddle
 import paddle.fluid as fluid
 
 sys.path.append("../")
@@ -106,7 +107,9 @@ class TestSequenceReverseOpError(unittest.TestCase):
 
         def test_dtype():
             # dtype must be 'float32', 'float64', 'int8', 'int32', 'int64'
-            x2_data = fluid.layers.data(name='x2', shape=[4], dtype='float16')
+            x2_data = paddle.static.data(
+                name='x2', shape=[-1, 4], dtype='float16'
+            )
             fluid.layers.sequence_reverse(x=x2_data)
 
         self.assertRaises(TypeError, test_dtype)
diff --git a/python/paddle/fluid/tests/unittests/seresnext_net.py b/python/paddle/fluid/tests/unittests/seresnext_net.py
index beb24de94677e755a91ee1b0dbbb4989833be73f..6e1ad62dc13f5f04ffc3b6dd340b56d90bdec9b0 100644
--- a/python/paddle/fluid/tests/unittests/seresnext_net.py
+++ b/python/paddle/fluid/tests/unittests/seresnext_net.py
@@ -131,8 +131,10 @@ img_shape = [3, 224, 224]
 
 def SE_ResNeXt50Small(use_feed):
 
-    img = fluid.layers.data(name='image', shape=img_shape, dtype='float32')
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    img = paddle.static.data(
+        name='image', shape=[-1] + img_shape, dtype='float32'
+    )
+    label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
 
     conv = conv_bn_layer(
         input=img, num_filters=16, filter_size=3, stride=2, act='relu'
diff --git a/python/paddle/fluid/tests/unittests/simple_nets.py b/python/paddle/fluid/tests/unittests/simple_nets.py
index 2b41107061d07c3fb5fd1f0b5d29495f138973b6..a3ff2b686574477e8f12132c97c1c9e0e538cf5b 100644
--- a/python/paddle/fluid/tests/unittests/simple_nets.py
+++ b/python/paddle/fluid/tests/unittests/simple_nets.py
@@ -40,8 +40,8 @@ def simple_fc_net_with_inputs(img, label, class_num=10):
 
 
 def simple_fc_net(use_feed=None):
-    img = fluid.layers.data(name='image', shape=[784], dtype='float32')
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    img = paddle.static.data(name='image', shape=[-1, 784], dtype='float32')
+    label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
     return simple_fc_net_with_inputs(img, label, class_num=10)
 
 
@@ -70,8 +70,8 @@ def batchnorm_fc_with_inputs(img, label, class_num=10):
 
 
 def fc_with_batchnorm(use_feed=None):
-    img = fluid.layers.data(name='image', shape=[784], dtype='float32')
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    img = paddle.static.data(name='image', shape=[-1, 784], dtype='float32')
+    label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
     return batchnorm_fc_with_inputs(img, label, class_num=10)
 
 
@@ -89,10 +89,10 @@ def bow_net(
     This model is from https://github.com/PaddlePaddle/models:
     fluid/PaddleNLP/text_classification/nets.py
     """
-    data = fluid.layers.data(
-        name="words", shape=[1], dtype="int64", lod_level=1
+    data = paddle.static.data(
+        name="words", shape=[-1, 1], dtype="int64", lod_level=1
     )
-    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+    label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64")
     emb = fluid.layers.embedding(
         input=data, is_sparse=is_sparse, size=[dict_dim, emb_dim]
     )
diff --git a/python/paddle/fluid/tests/unittests/test_accuracy_op.py b/python/paddle/fluid/tests/unittests/test_accuracy_op.py
index 446240e9fb71da1d63187311f205e01442c62dfd..431d8b24bcee2f7581c25a542a71fc9cb1a6340f 100755
--- a/python/paddle/fluid/tests/unittests/test_accuracy_op.py
+++ b/python/paddle/fluid/tests/unittests/test_accuracy_op.py
@@ -66,16 +66,18 @@ class TestAccuracyOpError(unittest.TestCase):
             x1 = fluid.create_lod_tensor(
                 np.array([[-1]]), [[1]], fluid.CPUPlace()
             )
-            label = fluid.layers.data(
+            label = paddle.static.data(
                 name='label', shape=[-1, 1], dtype="int32"
             )
             self.assertRaises(TypeError, paddle.static.accuracy, x1, label)
             self.assertRaises(TypeError, paddle.metric.accuracy, x1, label)
             # The input dtype of accuracy_op must be float32 or float64.
-            x2 = fluid.layers.data(name='x2', shape=[4], dtype="int32")
+            x2 = paddle.static.data(name='x2', shape=[-1, 4], dtype="int32")
             self.assertRaises(TypeError, paddle.static.accuracy, x2, label)
             self.assertRaises(TypeError, paddle.metric.accuracy, x2, label)
-            x3 = fluid.layers.data(name='input', shape=[-1, 2], dtype="float16")
+            x3 = paddle.static.data(
+                name='input', shape=[-1, 2], dtype="float16"
+            )
             paddle.static.accuracy(input=x3, label=label)
             paddle.metric.accuracy(input=x3, label=label)
 
diff --git a/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py b/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py
index 48349cfe910b35f35e1e19399c39bd40a0b63726..8333da1accfda292da640353f0f005bd29224873 100644
--- a/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py
@@ -21,7 +21,6 @@ from decorator_helper import prog_scope
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import paddle.fluid.layers as layers
 import paddle.nn.functional as F
 
 
@@ -31,7 +30,7 @@ class TestSigmoidTripleGradCheck(unittest.TestCase):
         shape = [2, 3, 7, 9]
         eps = 0.0005
         dtype = np.float64
-        x = layers.data('x', shape, False, dtype=dtype)
+        x = paddle.static.data('x', shape, dtype=dtype)
         x.persistable = True
         y = F.sigmoid(x)
         x_arr = np.random.random(shape).astype(dtype)
@@ -58,7 +57,7 @@ class TestSigmoidDoubleGradCheck(unittest.TestCase):
         shape = [2, 3, 7, 9]
         eps = 0.0005
         dtype = np.float64
-        x = layers.data('x', shape, False, dtype=dtype)
+        x = paddle.static.data('x', shape, dtype=dtype)
         x.persistable = True
         y = F.sigmoid(x)
         x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
@@ -88,7 +87,7 @@ class TestTanhTripleGradCheck(unittest.TestCase):
         shape = [2, 3, 7, 9]
         eps = 0.0005
         dtype = np.float64
-        x = layers.data('x', shape, False, dtype=dtype)
+        x = paddle.static.data('x', shape, dtype=dtype)
         x.persistable = True
         y = paddle.tanh(x)
         x_arr = np.random.random(shape).astype(dtype)
@@ -118,7 +117,7 @@ class TestTanhDoubleGradCheck(unittest.TestCase):
         shape = [2, 3, 7, 9]
         eps = 0.0005
         dtype = np.float64
-        x = layers.data('x', shape, False, dtype=dtype)
+        x = paddle.static.data('x', shape, dtype=dtype)
         x.persistable = True
         y = paddle.tanh(x)
         x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
@@ -148,7 +147,7 @@ class TestAbsDoubleGradCheck(unittest.TestCase):
         shape = [2, 3, 7, 9]
         eps = 0.0005
         dtype = np.float64
-        x = layers.data('x', shape, False, dtype=dtype)
+        x = paddle.static.data('x', shape, dtype=dtype)
         x.persistable = True
         y = paddle.abs(x)
         x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
@@ -176,7 +175,7 @@ class TestReluDoubleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float64
 
-        x = layers.data('x', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
         x.persistable = True
         y = F.relu(x)
         x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
@@ -206,7 +205,7 @@ class TestLeakyReluDoubleGradCheck(unittest.TestCase):
         alpha = 0.2
         dtype = np.float64
 
-        x = layers.data('x', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
         x.persistable = True
 
         y = paddle.nn.functional.leaky_relu(x, alpha)
@@ -241,7 +240,7 @@ class TestELUDoubleGradCheck(unittest.TestCase):
         dtype = np.float64
         SEED = 0
 
-        x = layers.data('x', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
         x.persistable = True
 
         y = paddle.nn.functional.elu(x, alpha=alpha)
@@ -275,7 +274,7 @@ class TestCELUDoubleGradCheck(unittest.TestCase):
         dtype = np.float64
         SEED = 0
 
-        x = layers.data('x', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
         x.persistable = True
 
         y = F.celu(x, alpha=alpha)
@@ -307,7 +306,7 @@ class TestSqrtDoubleGradCheck(unittest.TestCase):
         eps = 0.0001
         dtype = np.float64
 
-        x = layers.data('x', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
         x.persistable = True
 
         y = paddle.sqrt(x)
@@ -339,7 +338,7 @@ class TestRsqrtDoubleGradCheck(unittest.TestCase):
         eps = 0.0001
         dtype = np.float64
 
-        x = layers.data('x', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
         x.persistable = True
 
         y = paddle.rsqrt(x)
@@ -372,7 +371,7 @@ class TestSquareDoubleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float64
 
-        x = layers.data('x', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
         x.persistable = True
         y = paddle.square(x)
         x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
@@ -403,7 +402,7 @@ class TestLogDoubleGradCheck(unittest.TestCase):
         eps = 1e-6
         dtype = np.float64
 
-        x = layers.data('x', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
         x.persistable = True
         y = paddle.log(x)
 
@@ -434,7 +433,7 @@ class TestSinDoubleGradCheck(unittest.TestCase):
         shape = [2, 3, 7, 9]
         eps = 0.0005
         dtype = np.float64
-        x = layers.data('x', shape, False, dtype=dtype)
+        x = paddle.static.data('x', shape, dtype=dtype)
         x.persistable = True
         y = paddle.sin(x)
         x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
@@ -464,7 +463,7 @@ class TestCosDoubleGradCheck(unittest.TestCase):
         shape = [2, 3, 7, 9]
         eps = 0.0005
         dtype = np.float64
-        x = layers.data('x', shape, False, dtype=dtype)
+        x = paddle.static.data('x', shape, dtype=dtype)
         x.persistable = True
         y = paddle.cos(x)
         x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
@@ -494,7 +493,7 @@ class TestPowDoubleGradCheck1(unittest.TestCase):
         shape = [2, 3, 7, 9]
         eps = 1e-6
         dtype = np.float64
-        x = layers.data('x', shape, False, dtype=dtype)
+        x = paddle.static.data('x', shape, dtype=dtype)
         x.persistable = True
         y = paddle.pow(x, 2)
         x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
@@ -523,7 +522,7 @@ class TestPowDoubleGradCheck2(unittest.TestCase):
         shape = [2, 3, 7, 9]
         eps = 1e-6
         dtype = np.float64
-        x = layers.data('x', shape, False, dtype=dtype)
+        x = paddle.static.data('x', shape, dtype=dtype)
         x.persistable = True
         y = paddle.pow(x, 1)
         x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
@@ -552,7 +551,7 @@ class TestSinTripleGradCheck(unittest.TestCase):
         shape = [2, 3, 7, 9]
         eps = 0.0005
         dtype = np.float64
-        x = layers.data('x', shape, False, dtype=dtype)
+        x = paddle.static.data('x', shape, dtype=dtype)
         x.persistable = True
         y = paddle.sin(x)
         x_arr = np.random.random(shape).astype(dtype)
@@ -582,7 +581,7 @@ class TestPowTripleGradCheck1(unittest.TestCase):
         shape = [2, 3, 7, 9]
         eps = 1e-6
         dtype = np.float64
-        x = layers.data('x', shape, False, dtype=dtype)
+        x = paddle.static.data('x', shape, dtype=dtype)
         x.persistable = True
         y = paddle.pow(x, 1)
         x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
@@ -611,7 +610,7 @@ class TestPowTripleGradCheck2(unittest.TestCase):
         shape = [2, 3, 7, 9]
         eps = 1e-6
         dtype = np.float64
-        x = layers.data('x', shape, False, dtype=dtype)
+        x = paddle.static.data('x', shape, dtype=dtype)
         x.persistable = True
         y = paddle.pow(x, 2)
         x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
@@ -640,7 +639,7 @@ class TestPowTripleGradCheck3(unittest.TestCase):
         shape = [2, 3, 7, 9]
         eps = 1e-6
         dtype = np.float64
-        x = layers.data('x', shape, False, dtype=dtype)
+        x = paddle.static.data('x', shape, dtype=dtype)
         x.persistable = True
         y = paddle.pow(x, 4)
         x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
@@ -669,7 +668,7 @@ class TestCosTripleGradCheck(unittest.TestCase):
         shape = [2, 3, 7, 9]
         eps = 0.0005
         dtype = np.float64
-        x = layers.data('x', shape, False, dtype=dtype)
+        x = paddle.static.data('x', shape, dtype=dtype)
         x.persistable = True
         y = paddle.cos(x)
         x_arr = np.random.random(shape).astype(dtype)
diff --git a/python/paddle/fluid/tests/unittests/test_activation_op.py b/python/paddle/fluid/tests/unittests/test_activation_op.py
index 2f0c594746a508f1c2794c381c5d35658d315887..1f0a49cbb35ae875f14cbd93406dc6bd3fa887d2 100755
--- a/python/paddle/fluid/tests/unittests/test_activation_op.py
+++ b/python/paddle/fluid/tests/unittests/test_activation_op.py
@@ -34,13 +34,13 @@ class TestSqrtOpError(unittest.TestCase):
             in1 = 1
             self.assertRaises(TypeError, paddle.sqrt, in1)
             # The input dtype of sqrt op must be float16, float32, float64.
-            in2 = fluid.layers.data(
-                name='input2', shape=[12, 10], dtype="int32"
+            in2 = paddle.static.data(
+                name='input2', shape=[-1, 12, 10], dtype="int32"
             )
             self.assertRaises(TypeError, paddle.sqrt, in2)
 
-            in3 = fluid.layers.data(
-                name='input3', shape=[12, 10], dtype="float16"
+            in3 = paddle.static.data(
+                name='input3', shape=[-1, 12, 10], dtype="float16"
             )
             paddle.sqrt(x=in3)
 
@@ -167,8 +167,8 @@ class TestExpm1API(unittest.TestCase):
 class TestParameter:
     def test_out_name(self):
         with fluid.program_guard(fluid.Program()):
-            np_x = np.array([0.1])
-            data = fluid.layers.data(name="X", shape=[1])
+            np_x = np.array([0.1]).astype('float32').reshape((-1, 1))
+            data = paddle.static.data(name="X", shape=[-1, 1], dtype="float32")
             out = eval("paddle.%s(data, name='Y')" % self.op_type)
             place = fluid.CPUPlace()
             exe = fluid.Executor(place)
@@ -520,8 +520,8 @@ class TestAtan(TestActivation, TestParameter):
 
     def test_out_name(self):
         with fluid.program_guard(fluid.Program()):
-            np_x = np.array([0.1])
-            data = fluid.layers.data(name="X", shape=[1])
+            np_x = np.array([0.1]).astype('float32').reshape((-1, 1))
+            data = paddle.static.data(name="X", shape=[-1, 1], dtype="float32")
             out = paddle.atan(data, name='Y')
             place = fluid.CPUPlace()
             exe = fluid.Executor(place)
@@ -582,10 +582,9 @@ class TestSinhAPI(unittest.TestCase):
             input_x = np.random.uniform(0.1, 1, test_data_shape).astype(
                 "float32"
             )
-            data_x = fluid.layers.data(
+            data_x = paddle.static.data(
                 name="data_x",
                 shape=test_data_shape,
-                append_batch_size=False,
                 dtype="float32",
             )
 
@@ -667,10 +666,9 @@ class TestCoshAPI(unittest.TestCase):
             input_x = np.random.uniform(0.1, 1, test_data_shape).astype(
                 "float32"
             )
-            data_x = fluid.layers.data(
+            data_x = paddle.static.data(
                 name="data_x",
                 shape=test_data_shape,
-                append_batch_size=False,
                 dtype="float32",
             )
 
@@ -2399,12 +2397,8 @@ class TestLog(TestActivation):
         self.check_grad(['X'], 'Out', check_eager=True)
 
     def test_error(self):
-        in1 = fluid.layers.data(
-            name="in1", shape=[11, 17], append_batch_size=False, dtype="int32"
-        )
-        in2 = fluid.layers.data(
-            name="in2", shape=[11, 17], append_batch_size=False, dtype="int64"
-        )
+        in1 = paddle.static.data(name="in1", shape=[11, 17], dtype="int32")
+        in2 = paddle.static.data(name="in2", shape=[11, 17], dtype="int64")
 
         self.assertRaises(TypeError, paddle.log, in1)
         self.assertRaises(TypeError, paddle.log, in2)
@@ -2569,10 +2563,9 @@ class TestLog1pAPI(unittest.TestCase):
     def test_api(self):
         with fluid.program_guard(fluid.Program(), fluid.Program()):
             input_x = np.random.uniform(0.1, 1, [11, 17]).astype("float64")
-            data_x = fluid.layers.data(
+            data_x = paddle.static.data(
                 name="data_x",
                 shape=[11, 17],
-                append_batch_size=False,
                 dtype="float64",
             )
 
@@ -2718,12 +2711,8 @@ class TestPow_factor_tensor(TestActivation):
 
     def test_api(self):
         input = np.random.uniform(1, 2, [11, 17]).astype("float32")
-        x = fluid.layers.data(
-            name="x", shape=[11, 17], append_batch_size=False, dtype="float32"
-        )
-        res = fluid.layers.data(
-            name="res", shape=[11, 17], append_batch_size=False, dtype="float32"
-        )
+        x = paddle.static.data(name="x", shape=[11, 17], dtype="float32")
+        res = paddle.static.data(name="res", shape=[11, 17], dtype="float32")
 
         factor_1 = 2.0
         factor_2 = fluid.layers.fill_constant([1], "float32", 3.0)
diff --git a/python/paddle/fluid/tests/unittests/test_adadelta_op.py b/python/paddle/fluid/tests/unittests/test_adadelta_op.py
index 95a485ce3a4e7d4acd0146eacfa33e10116de262..51435ccb95fc998dd4013b4a08d06ff3fc8ff3c6 100644
--- a/python/paddle/fluid/tests/unittests/test_adadelta_op.py
+++ b/python/paddle/fluid/tests/unittests/test_adadelta_op.py
@@ -143,8 +143,8 @@ class TestAdadeltaV2(unittest.TestCase):
         place = fluid.CPUPlace()
         main = fluid.Program()
         with fluid.program_guard(main):
-            x = fluid.layers.data(name='x', shape=[13], dtype='float32')
-            y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+            x = paddle.static.data(name='x', shape=[-1, 13], dtype='float32')
+            y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
             y_predict = paddle.static.nn.fc(x, size=1)
             cost = paddle.nn.functional.square_error_cost(
                 input=y_predict, label=y
diff --git a/python/paddle/fluid/tests/unittests/test_addmm_op.py b/python/paddle/fluid/tests/unittests/test_addmm_op.py
index 9a1385c63b5dbc39e51025a13feae6532cd3c758..2e4a9515b6aef1442b9391d9aad531c04e822206 100644
--- a/python/paddle/fluid/tests/unittests/test_addmm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_addmm_op.py
@@ -76,109 +76,86 @@ class TestAddMMOpError(unittest.TestCase):
             self.assertRaises(TypeError, paddle.addmm, input, x1, x2)
 
             # The input dtype of mul_op must be float32 or float64.
-            input = fluid.layers.data(
+            input = paddle.static.data(
                 name='input',
                 shape=[4, 4],
                 dtype="int32",
-                append_batch_size=False,
-            )
-            x3 = fluid.layers.data(
-                name='x3', shape=[4, 4], dtype="int32", append_batch_size=False
-            )
-            x4 = fluid.layers.data(
-                name='x4', shape=[4, 4], dtype="int32", append_batch_size=False
             )
+            x3 = paddle.static.data(name='x3', shape=[4, 4], dtype="int32")
+            x4 = paddle.static.data(name='x4', shape=[4, 4], dtype="int32")
             self.assertRaises(TypeError, paddle.addmm, input, x3, x4)
             # x and y dimension mismatch
-            x5 = fluid.layers.data(
+            x5 = paddle.static.data(
                 name='x5',
                 shape=[4, 5],
                 dtype="float32",
-                append_batch_size=False,
             )
-            x6 = fluid.layers.data(
+            x6 = paddle.static.data(
                 name='x6',
                 shape=[4, 4],
                 dtype="float32",
-                append_batch_size=False,
             )
             self.assertRaises(ValueError, paddle.addmm, input, x5, x6)
             # input and x are not broadcastable
-            x7 = fluid.layers.data(
+            x7 = paddle.static.data(
                 name='x7',
                 shape=[4, 4],
                 dtype="float32",
-                append_batch_size=False,
             )
-            x8 = fluid.layers.data(
+            x8 = paddle.static.data(
                 name='x8',
                 shape=[4, 4],
                 dtype="float32",
-                append_batch_size=False,
             )
-            input1 = fluid.layers.data(
+            input1 = paddle.static.data(
                 name='input1',
                 shape=[2, 4],
                 dtype="float32",
-                append_batch_size=False,
             )
             self.assertRaises(ValueError, paddle.addmm, input1, x7, x8)
             # input and x are not broadcastable
-            x9 = fluid.layers.data(
+            x9 = paddle.static.data(
                 name='x9',
                 shape=[4, 4],
                 dtype="float32",
-                append_batch_size=False,
             )
-            x10 = fluid.layers.data(
+            x10 = paddle.static.data(
                 name='x10',
                 shape=[4, 4],
                 dtype="float32",
-                append_batch_size=False,
             )
-            input2 = fluid.layers.data(
+            input2 = paddle.static.data(
                 name='input2',
                 shape=[1, 2],
                 dtype="float32",
-                append_batch_size=False,
             )
             self.assertRaises(ValueError, paddle.addmm, input2, x9, x10)
-            x11 = fluid.layers.data(
+            x11 = paddle.static.data(
                 name='x11',
                 shape=[4, 4],
                 dtype="float32",
-                append_batch_size=False,
-            )
-            x12 = fluid.layers.data(
-                name='x12',
-                shape=[4, 4],
-                dtype="float32",
-                append_batch_size=False,
             )
-            input3 = fluid.layers.data(
+            x12 = paddle.static.data(name='x12', shape=[4, 4], dtype="float32")
+            input3 = paddle.static.data(
                 name='input3',
                 shape=[4, 2],
                 dtype="float32",
-                append_batch_size=False,
             )
             self.assertRaises(ValueError, paddle.addmm, input3, x11, x12)
-            x13 = fluid.layers.data(
+            x13 = paddle.static.data(
                 name='x13',
                 shape=[4, 4],
                 dtype="float32",
-                append_batch_size=False,
             )
-            x14 = fluid.layers.data(
+            x14 = paddle.static.data(
                 name='x14',
                 shape=[4, 4],
                 dtype="float32",
-                append_batch_size=False,
             )
-            input4 = fluid.layers.data(
+            input4 = paddle.static.data(
                 name='input4',
                 shape=[3, 1],
                 dtype="float32",
-                append_batch_size=False,
             )
             self.assertRaises(ValueError, paddle.addmm, input4, x13, x14)
 
diff --git a/python/paddle/fluid/tests/unittests/test_argsort_op.py b/python/paddle/fluid/tests/unittests/test_argsort_op.py
index 17614692f69d8ac035dee1272ef928a0bd3d39ef..e3f90d7fd2d1001ecaf219201bcf462f7e0b06f7 100644
--- a/python/paddle/fluid/tests/unittests/test_argsort_op.py
+++ b/python/paddle/fluid/tests/unittests/test_argsort_op.py
@@ -84,13 +84,17 @@ class TestArgsortOpCPU(unittest.TestCase):
         )
 
         with fluid.program_guard(self.main_program, self.startup_program):
-            x = fluid.layers.data(
-                name="x", shape=self.input_shape, dtype=self.dtype
+            x = paddle.static.data(
+                name="x", shape=[-1] + list(self.input_shape), dtype=self.dtype
             )
             x.stop_gradient = False
-            label = fluid.layers.data(
-                name="label", shape=self.input_shape, dtype=self.dtype
+            x.desc.set_need_check_feed(False)
+            label = paddle.static.data(
+                name="label",
+                shape=[-1] + list(self.input_shape),
+                dtype=self.dtype,
             )
+            label.desc.set_need_check_feed(False)
             self.index = paddle.argsort(
                 x=x, axis=self.axis, descending=self.descending
             )
diff --git a/python/paddle/fluid/tests/unittests/test_array_read_write_op.py b/python/paddle/fluid/tests/unittests/test_array_read_write_op.py
index 3e4543cd1ce8ddf1d3b83a80663c0d784f02bb1b..497dc31477aeca69ba64f5b465e234b0d7b03694 100644
--- a/python/paddle/fluid/tests/unittests/test_array_read_write_op.py
+++ b/python/paddle/fluid/tests/unittests/test_array_read_write_op.py
@@ -62,9 +62,9 @@ class TestArrayReadWrite(unittest.TestCase):
     def test_read_write(self):
         paddle.enable_static()
         x = [
-            layers.data(name='x0', shape=[100]),
-            layers.data(name='x1', shape=[100]),
-            layers.data(name='x2', shape=[100]),
+            paddle.static.data(name='x0', shape=[-1, 100]),
+            paddle.static.data(name='x1', shape=[-1, 100]),
+            paddle.static.data(name='x2', shape=[-1, 100]),
         ]
         for each_x in x:
             each_x.stop_gradient = False
diff --git a/python/paddle/fluid/tests/unittests/test_assign_op.py b/python/paddle/fluid/tests/unittests/test_assign_op.py
index 02f649b39bfab231fe0691c39b217e017ebf3a9a..8017840c50c115304776c9d1b4973ab97bca493f 100644
--- a/python/paddle/fluid/tests/unittests/test_assign_op.py
+++ b/python/paddle/fluid/tests/unittests/test_assign_op.py
@@ -22,7 +22,6 @@ from decorator_helper import prog_scope
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import paddle.fluid.layers as layers
 from paddle.fluid import Program, program_guard
 from paddle.fluid.backward import append_backward
 
@@ -261,7 +260,7 @@ class TestAssignDoubleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float32
 
-        data = layers.data('data', [3, 4, 5], False, dtype)
+        data = paddle.static.data('data', [3, 4, 5], dtype)
         data.persistable = True
         out = paddle.fluid.layers.assign(data)
         data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
@@ -292,7 +291,7 @@ class TestAssignTripleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float32
 
-        data = layers.data('data', [3, 4, 5], False, dtype)
+        data = paddle.static.data('data', [3, 4, 5], dtype)
         data.persistable = True
         out = paddle.fluid.layers.assign(data)
         data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
diff --git a/python/paddle/fluid/tests/unittests/test_async_ssa_graph_executor_mnist.py b/python/paddle/fluid/tests/unittests/test_async_ssa_graph_executor_mnist.py
index fb6dc442a73c92f12051e20613aa44eeb77e22a3..3c6594e659e894e3d1f96c7a142e84463d9877a8 100644
--- a/python/paddle/fluid/tests/unittests/test_async_ssa_graph_executor_mnist.py
+++ b/python/paddle/fluid/tests/unittests/test_async_ssa_graph_executor_mnist.py
@@ -26,8 +26,10 @@ BATCH_SIZE = 64
 
 def convolutional_neural_network(use_py_reader):
     with fluid.unique_name.guard():
-        img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
-        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+        img = paddle.static.data(
+            name='img', shape=[-1, 1, 28, 28], dtype='float32'
+        )
+        label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
 
         py_reader = None
         if use_py_reader:
diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py
index 946b8959d0b4d397a451cb8a14bb7a8fa0ca11fb..25e4ab9aa8b4a48c9f4ab6349e5ba4bbb00cd67a 100644
--- a/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py
+++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py
@@ -38,7 +38,7 @@ from paddle.distributed.auto_parallel.parallelizer import AutoParallelizer
 from paddle.distributed.auto_parallel.partitioner import Partitioner
 from paddle.distributed.auto_parallel.reshard import Resharder
 from paddle.distributed.fleet import auto
-from paddle.fluid import core, layers
+from paddle.fluid import core
 from paddle.fluid.initializer import NumpyArrayInitializer
 
 if os.getenv("CUDA_VISIBLE_DEVICES") is not None:
@@ -588,7 +588,9 @@ class TestAutoParallelMapper(unittest.TestCase):
         root_id = 0
         nranks = 2
         with fluid.program_guard(train_program, startup_program):
-            input = layers.data(name="input", shape=[10, 10], dtype='float32')
+            input = paddle.static.data(
+                name="input", shape=[-1, 10, 10], dtype='float32'
+            )
             output = train_program.current_block().create_var(
                 name="outofbroadcast",
                 dtype='float32',
diff --git a/python/paddle/fluid/tests/unittests/test_batch_norm_op.py b/python/paddle/fluid/tests/unittests/test_batch_norm_op.py
index 6802a8a9ea995726eed7985b7bea78e2e1eb910a..c2a6c468e5c8f184e6e74a98151d6c008ba997d4 100644
--- a/python/paddle/fluid/tests/unittests/test_batch_norm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_batch_norm_op.py
@@ -763,7 +763,9 @@ class TestBatchNormOpError(unittest.TestCase):
 
             # the input dtype of batch_norm must be float16 or float32 or float64
             # float16 only can be set on GPU place
-            x2 = fluid.layers.data(name='x2', shape=[3, 4, 5, 6], dtype="int32")
+            x2 = paddle.static.data(
+                name='x2', shape=[-1, 3, 4, 5, 6], dtype="int32"
+            )
             self.assertRaises(TypeError, paddle.static.nn.batch_norm, x2)
 
 
@@ -779,7 +781,9 @@ class TestDygraphBatchNormAPIError(unittest.TestCase):
 
             # the input dtype of BatchNorm must be float16 or float32 or float64
             # float16 only can be set on GPU place
-            x2 = fluid.layers.data(name='x2', shape=[3, 4, 5, 6], dtype="int32")
+            x2 = paddle.static.data(
+                name='x2', shape=[-1, 3, 4, 5, 6], dtype="int32"
+            )
             self.assertRaises(TypeError, batch_norm, x2)
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_bmm_op.py b/python/paddle/fluid/tests/unittests/test_bmm_op.py
index 5269f27ccdaa3990cd5a425286bf56947c0eb4cc..aaa0e1f97fde11959ce938dca1ceb88cc502a89c 100644
--- a/python/paddle/fluid/tests/unittests/test_bmm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_bmm_op.py
@@ -41,10 +41,10 @@ class TestBmmOp(OpTest):
 class API_TestBmm(unittest.TestCase):
     def test_out(self):
         with fluid.program_guard(fluid.Program(), fluid.Program()):
-            data1 = fluid.layers.data(
+            data1 = paddle.static.data(
                 'data1', shape=[-1, 3, 4], dtype='float64'
             )
-            data2 = fluid.layers.data(
+            data2 = paddle.static.data(
                 'data2', shape=[-1, 4, 5], dtype='float64'
             )
             result_bmm = paddle.bmm(data1, data2)
diff --git a/python/paddle/fluid/tests/unittests/test_boxps.py b/python/paddle/fluid/tests/unittests/test_boxps.py
index 4f2d3ee138a77573fad6a64ae53c99f0bc18935a..b6a1a845aa326e3b455a76596a3898e5fc5feef3 100644
--- a/python/paddle/fluid/tests/unittests/test_boxps.py
+++ b/python/paddle/fluid/tests/unittests/test_boxps.py
@@ -96,11 +96,11 @@ class TestPullBoxSparseOP(unittest.TestCase):
         paddle.enable_static()
         program = fluid.Program()
         with fluid.program_guard(program):
-            x = fluid.layers.data(
-                name='x', shape=[1], dtype='int64', lod_level=0
+            x = paddle.static.data(
+                name='x', shape=[-1, 1], dtype='int64', lod_level=0
             )
-            y = fluid.layers.data(
-                name='y', shape=[1], dtype='int64', lod_level=0
+            y = paddle.static.data(
+                name='y', shape=[-1, 1], dtype='int64', lod_level=0
             )
             emb_x, emb_y = _pull_box_sparse([x, y], size=1)
 
diff --git a/python/paddle/fluid/tests/unittests/test_broadcast_tensors_op.py b/python/paddle/fluid/tests/unittests/test_broadcast_tensors_op.py
index 8a1b6a52a2baa92412b4830df600aaef4a795f4a..6eec711c49e0abb854cd5da84216e7a966221ec6 100644
--- a/python/paddle/fluid/tests/unittests/test_broadcast_tensors_op.py
+++ b/python/paddle/fluid/tests/unittests/test_broadcast_tensors_op.py
@@ -164,11 +164,11 @@ class TestBroadcastTensorsAPI(unittest.TestCase):
     def test_api(self):
         def test_static():
             inputs = [
-                paddle.fluid.layers.data(
-                    shape=[4, 1, 4, 1], dtype='float32', name="x0"
+                paddle.static.data(
+                    shape=[-1, 4, 1, 4, 1], dtype='float32', name="x0"
                 ),
-                paddle.fluid.layers.data(
-                    shape=[1, 4, 1, 4], dtype='float32', name="x1"
+                paddle.static.data(
+                    shape=[-1, 1, 4, 1, 4], dtype='float32', name="x1"
                 ),
             ]
             paddle.broadcast_tensors(inputs)
@@ -196,33 +196,33 @@ class TestRaiseBroadcastTensorsError(unittest.TestCase):
     def test_errors(self):
         def test_type():
             inputs = [
-                paddle.fluid.layers.data(
-                    shape=[1, 1, 1, 1], dtype='float32', name="x4"
+                paddle.static.data(
+                    shape=[-1, 1, 1, 1, 1], dtype='float32', name="x4"
                 ),
-                paddle.fluid.layers.data(
-                    shape=[1, 4, 1, 1], dtype='float64', name="x5"
+                paddle.static.data(
+                    shape=[-1, 1, 4, 1, 1], dtype='float64', name="x5"
                 ),
             ]
             paddle.broadcast_tensors(inputs)
 
         def test_dtype():
             inputs = [
-                paddle.fluid.layers.data(
-                    shape=[1, 1, 1, 1], dtype='int8', name="x6"
+                paddle.static.data(
+                    shape=[-1, 1, 1, 1, 1], dtype='int8', name="x6"
                 ),
-                paddle.fluid.layers.data(
-                    shape=[1, 4, 1, 1], dtype='int8', name="x7"
+                paddle.static.data(
+                    shape=[-1, 1, 4, 1, 1], dtype='int8', name="x7"
                 ),
             ]
             paddle.broadcast_tensors(inputs)
 
         def test_bcast_semantics():
             inputs = [
-                paddle.fluid.layers.data(
-                    shape=[1, 3, 1, 1], dtype='float32', name="x9"
+                paddle.static.data(
+                    shape=[-1, 1, 3, 1, 1], dtype='float32', name="x9"
                 ),
-                paddle.fluid.layers.data(
-                    shape=[1, 8, 1, 1], dtype='float32', name="x10"
+                paddle.static.data(
+                    shape=[-1, 1, 8, 1, 1], dtype='float32', name="x10"
                 ),
             ]
             paddle.broadcast_tensors(inputs)
diff --git a/python/paddle/fluid/tests/unittests/test_broadcast_to_op.py b/python/paddle/fluid/tests/unittests/test_broadcast_to_op.py
index ea8e7648b1d32d3a35ee91fc4e7a4fb167da36a7..897c5b54b169b7e6054cf6692b951f4d8d234ecc 100644
--- a/python/paddle/fluid/tests/unittests/test_broadcast_to_op.py
+++ b/python/paddle/fluid/tests/unittests/test_broadcast_to_op.py
@@ -31,9 +31,9 @@ class TestBroadcastToError(unittest.TestCase):
             )
             shape = [2, 2]
             self.assertRaises(TypeError, paddle.tensor.broadcast_to, x1, shape)
-            x2 = fluid.layers.data(name='x2', shape=[4], dtype="uint8")
+            x2 = paddle.static.data(name='x2', shape=[-1, 4], dtype="uint8")
             self.assertRaises(TypeError, paddle.tensor.broadcast_to, x2, shape)
-            x3 = fluid.layers.data(name='x3', shape=[4], dtype="bool")
+            x3 = paddle.static.data(name='x3', shape=[-1, 4], dtype="bool")
             x3.stop_gradient = False
             self.assertRaises(ValueError, paddle.tensor.broadcast_to, x3, shape)
 
@@ -42,15 +42,12 @@ class TestBroadcastToError(unittest.TestCase):
 class TestBroadcastToAPI(unittest.TestCase):
     def test_api(self):
         input = np.random.random([12, 14]).astype("float32")
-        x = fluid.layers.data(
-            name='x', shape=[12, 14], append_batch_size=False, dtype="float32"
-        )
+        x = paddle.static.data(name='x', shape=[12, 14], dtype="float32")
 
         positive_2 = fluid.layers.fill_constant([1], "int32", 12)
-        expand_shape = fluid.layers.data(
+        expand_shape = paddle.static.data(
             name="expand_shape",
             shape=[2],
-            append_batch_size=False,
             dtype="int32",
         )
 
diff --git a/python/paddle/fluid/tests/unittests/test_cast_op.py b/python/paddle/fluid/tests/unittests/test_cast_op.py
index ea7f24a4883867704c8a793d154d1b354a4e3dee..0e451c3e9fe776c45629b61499c4fc5e5880c91d 100644
--- a/python/paddle/fluid/tests/unittests/test_cast_op.py
+++ b/python/paddle/fluid/tests/unittests/test_cast_op.py
@@ -22,7 +22,6 @@ from op_test import OpTest, convert_float_to_uint16, convert_uint16_to_float
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import paddle.fluid.layers as layers
 from paddle.fluid import Program, program_guard
 
 
@@ -142,7 +141,7 @@ class TestCastDoubleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float32
 
-        data = layers.data('data', [2, 3, 4], False, dtype)
+        data = paddle.static.data('data', [2, 3, 4], dtype)
         data.persistable = True
         out = paddle.cast(data, 'float64')
         data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
@@ -173,7 +172,7 @@ class TestCastTripleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float32
 
-        data = layers.data('data', [2, 3, 4], False, dtype)
+        data = paddle.static.data('data', [2, 3, 4], dtype)
         data.persistable = True
         out = paddle.cast(data, 'float64')
         data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
diff --git a/python/paddle/fluid/tests/unittests/test_communicator_async.py b/python/paddle/fluid/tests/unittests/test_communicator_async.py
index ebc65cd5ac240b5ea80abdefa6b188b7d3a0840e..322ec2f9205a01cc4bb129cb35a551de02986cfc 100644
--- a/python/paddle/fluid/tests/unittests/test_communicator_async.py
+++ b/python/paddle/fluid/tests/unittests/test_communicator_async.py
@@ -27,8 +27,8 @@ import paddle.fluid as fluid
 
 class TestCommunicator(unittest.TestCase):
     def net(self):
-        x = fluid.layers.data(name='x', shape=[1], dtype='float32')
-        y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+        x = paddle.static.data(name='x', shape=[-1, 1], dtype='float32')
+        y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
 
         cost = paddle.nn.functional.square_error_cost(input=x, label=y)
         avg_cost = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/test_communicator_geo.py b/python/paddle/fluid/tests/unittests/test_communicator_geo.py
index 9019e9e9e3f5ecb110c9cd5200ae8e6906306990..0e836dca1c2e515d95b9ff6c94f4383203779a5f 100644
--- a/python/paddle/fluid/tests/unittests/test_communicator_geo.py
+++ b/python/paddle/fluid/tests/unittests/test_communicator_geo.py
@@ -31,8 +31,10 @@ paddle.enable_static()
 
 class TestCommunicatorGeoEnd2End(unittest.TestCase):
     def net(self):
-        x = fluid.layers.data(name='x', shape=[13], dtype='float32')
-        x1 = fluid.layers.data(name='x1', shape=[1], dtype='int64', lod_level=1)
+        x = paddle.static.data(name='x', shape=[-1, 13], dtype='float32')
+        x1 = paddle.static.data(
+            name='x1', shape=[-1, 1], dtype='int64', lod_level=1
+        )
 
         emb = fluid.layers.embedding(
             input=x1,
@@ -47,8 +49,7 @@ class TestCommunicatorGeoEnd2End(unittest.TestCase):
         pool = fluid.layers.sequence_pool(input=emb, pool_type="sum")
         z = fluid.layers.concat(input=[x, pool], axis=1)
         y_predict = paddle.static.nn.fc(x=z, size=1)
-        y = fluid.layers.data(name='y', shape=[1], dtype='float32')
-
+        y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
         cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_cost = paddle.mean(cost)
         return avg_cost, x, x1, y
diff --git a/python/paddle/fluid/tests/unittests/test_communicator_ps_gpu.py b/python/paddle/fluid/tests/unittests/test_communicator_ps_gpu.py
index f0c1e9c8850d8547094562ad6e1ace089e0a121e..b767d98281ed182ecc9679bd3db3a063885f1335 100644
--- a/python/paddle/fluid/tests/unittests/test_communicator_ps_gpu.py
+++ b/python/paddle/fluid/tests/unittests/test_communicator_ps_gpu.py
@@ -51,8 +51,8 @@ class TestCommunicator(unittest.TestCase):
         role = role_maker.PaddleCloudRoleMaker()
 
         fleet.init(role)
-        x = fluid.layers.data(name='x', shape=[1], dtype='float32')
-        y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+        x = paddle.static.data(name='x', shape=[-1, 1], dtype='float32')
+        y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
         slots_vars = [x, y]
 
         cost = paddle.nn.functional.square_error_cost(input=x, label=y)
diff --git a/python/paddle/fluid/tests/unittests/test_compare_op.py b/python/paddle/fluid/tests/unittests/test_compare_op.py
index fa98771ce1ab5eb0522720adc2dcfd7d9915bdd1..55cc9d933fc02bdd079f51b13a1f1a8492fbd8f9 100755
--- a/python/paddle/fluid/tests/unittests/test_compare_op.py
+++ b/python/paddle/fluid/tests/unittests/test_compare_op.py
@@ -41,9 +41,9 @@ def create_test_class(op_type, typename, callback):
         def test_errors(self):
             paddle.enable_static()
             with program_guard(Program(), Program()):
-                x = fluid.layers.data(name='x', shape=[2], dtype='int32')
-                y = fluid.layers.data(name='y', shape=[2], dtype='int32')
-                a = fluid.layers.data(name='a', shape=[2], dtype='int16')
+                x = paddle.static.data(name='x', shape=[-1, 2], dtype='int32')
+                y = paddle.static.data(name='y', shape=[-1, 2], dtype='int32')
+                a = paddle.static.data(name='a', shape=[-1, 2], dtype='int16')
                 op = eval("paddle.%s" % self.op_type)
                 self.assertRaises(TypeError, op, x=x, y=a)
                 self.assertRaises(TypeError, op, x=a, y=y)
@@ -415,8 +415,8 @@ def create_paddle_case(op_type, callback):
         def test_attr_name(self):
             paddle.enable_static()
             with program_guard(Program(), Program()):
-                x = fluid.layers.data(name='x', shape=[4], dtype='int32')
-                y = fluid.layers.data(name='y', shape=[4], dtype='int32')
+                x = paddle.static.data(name='x', shape=[-1, 4], dtype='int32')
+                y = paddle.static.data(name='y', shape=[-1, 4], dtype='int32')
                 op = eval("paddle.%s" % (self.op_type))
                 out = op(x=x, y=y, name="name_%s" % (self.op_type))
             self.assertEqual("name_%s" % (self.op_type) in out.name, True)
@@ -439,7 +439,7 @@ class TestCompareOpError(unittest.TestCase):
         paddle.enable_static()
         with program_guard(Program(), Program()):
             # The input x and y of compare_op must be Variable.
-            x = fluid.layers.data(name='x', shape=[1], dtype="float32")
+            x = paddle.static.data(name='x', shape=[-1, 1], dtype="float32")
             y = fluid.create_lod_tensor(
                 numpy.array([[-1]]), [[1]], fluid.CPUPlace()
             )
diff --git a/python/paddle/fluid/tests/unittests/test_compiled_program.py b/python/paddle/fluid/tests/unittests/test_compiled_program.py
index 28c3feb010e817924d945760c975bb241e88c69d..a2ea57f83576ab338cf42d274101e742fa91b60c 100644
--- a/python/paddle/fluid/tests/unittests/test_compiled_program.py
+++ b/python/paddle/fluid/tests/unittests/test_compiled_program.py
@@ -102,10 +102,10 @@ class TestCompiledProgramError(unittest.TestCase):
         self.assertRaises(TypeError, fluid.CompiledProgram, "program")
 
     def build_simple_model(self):
-        img = fluid.layers.data(
-            name='image', shape=[1, 28, 28], dtype='float32'
+        img = paddle.static.data(
+            name='image', shape=[-1, 1, 28, 28], dtype='float32'
         )
-        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+        label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
         prediction = paddle.static.nn.fc(x=img, size=10, activation='softmax')
         loss = paddle.nn.functional.cross_entropy(
             input=prediction, label=label, reduction='none', use_softmax=False
diff --git a/python/paddle/fluid/tests/unittests/test_concat_op.py b/python/paddle/fluid/tests/unittests/test_concat_op.py
index d20b3e92c45f18f33ead2356ff13390bc6501c4c..7d7c2ddf70d49dcd385d9a760f8d4b10e9535a63 100644
--- a/python/paddle/fluid/tests/unittests/test_concat_op.py
+++ b/python/paddle/fluid/tests/unittests/test_concat_op.py
@@ -20,7 +20,6 @@ from decorator_helper import prog_scope
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 from paddle.fluid import Program, core, program_guard
 from paddle.fluid.tests.unittests.op_test import (
     OpTest,
@@ -250,7 +249,7 @@ class TestConcatOpError(unittest.TestCase):
     def test_errors(self):
         with program_guard(Program(), Program()):
             # The input type of concat_op should be list.
-            x1 = fluid.layers.data(shape=[4], dtype='int32', name='x1')
+            x1 = paddle.static.data(shape=[-1, 4], dtype='int32', name='x1')
             fluid.layers.concat(x1)
             # The item in input must be Variable.
             x2 = fluid.create_lod_tensor(
@@ -261,12 +260,12 @@ class TestConcatOpError(unittest.TestCase):
             )
             self.assertRaises(TypeError, fluid.layers.concat, [x2])
             # The input dtype of concat_op must be float16, float32, float64, int32, int64.
-            x4 = fluid.layers.data(shape=[4], dtype='uint8', name='x4')
-            x5 = fluid.layers.data(shape=[4], dtype='uint8', name='x5')
+            x4 = paddle.static.data(shape=[-1, 4], dtype='uint8', name='x4')
+            x5 = paddle.static.data(shape=[-1, 4], dtype='uint8', name='x5')
             self.assertRaises(TypeError, fluid.layers.concat, [x4, x5])
-            x6 = fluid.layers.data(shape=[4], dtype='float16', name='x6')
-            x7 = fluid.layers.data(shape=[4], dtype='float16', name='x7')
-            x8 = fluid.layers.data(shape=[4], dtype='float32', name='x8')
+            x6 = paddle.static.data(shape=[-1, 4], dtype='float16', name='x6')
+            x7 = paddle.static.data(shape=[-1, 4], dtype='float16', name='x7')
+            x8 = paddle.static.data(shape=[-1, 4], dtype='float32', name='x8')
             fluid.layers.concat([x6, x7])
 
             # The type of axis in concat_op should be int or Variable.
@@ -369,9 +368,9 @@ class TestConcatAPI(unittest.TestCase):
             self.assertRaises(TypeError, fluid.layers.concat, [x4, x5])
 
             # The type of axis in concat_op should be int or Variable.
-            x6 = fluid.layers.data(shape=[4], dtype='float16', name='x6')
-            x7 = fluid.layers.data(shape=[4], dtype='float16', name='x7')
-            x8 = fluid.layers.data(shape=[4], dtype='float32', name='x8')
+            x6 = paddle.static.data(shape=[-1, 4], dtype='float16', name='x6')
+            x7 = paddle.static.data(shape=[-1, 4], dtype='float16', name='x7')
+            x8 = paddle.static.data(shape=[-1, 4], dtype='float32', name='x8')
 
             def test_axis_type():
                 paddle.concat([x6, x7], 3.2)
@@ -457,9 +456,9 @@ class TestConcatDoubleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float32
 
-        data1 = layers.data('data1', [2, 3], False, dtype)
+        data1 = paddle.static.data('data1', [2, 3], dtype)
         data1.persistable = True
-        data2 = layers.data('data2', [2, 3], False, dtype)
+        data2 = paddle.static.data('data2', [2, 3], dtype)
         data2.persistable = True
         out = paddle.concat([data1, data2])
         data1_arr = np.random.uniform(-1, 1, data1.shape).astype(dtype)
@@ -498,9 +497,9 @@ class TestConcatTripleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float32
 
-        data1 = layers.data('data1', [2, 3, 4], False, dtype)
+        data1 = paddle.static.data('data1', [2, 3, 4], dtype)
         data1.persistable = True
-        data2 = layers.data('data2', [2, 3, 4], False, dtype)
+        data2 = paddle.static.data('data2', [2, 3, 4], dtype)
         data2.persistable = True
         out = paddle.concat([data1, data2], 1)
         data1_arr = np.random.uniform(-1, 1, data1.shape).astype(dtype)
diff --git a/python/paddle/fluid/tests/unittests/test_conditional_block.py b/python/paddle/fluid/tests/unittests/test_conditional_block.py
index 0aee7cadd7e52354b0c43fd3654a029b24ce6eb2..0f8f6b32c9a8c34ee183a86a696a32846e3d8eaa 100644
--- a/python/paddle/fluid/tests/unittests/test_conditional_block.py
+++ b/python/paddle/fluid/tests/unittests/test_conditional_block.py
@@ -30,7 +30,7 @@ class ConditionalBlockTest(unittest.TestCase):
         main_program = fluid.Program()
         startup_program = fluid.Program()
         with fluid.program_guard(main_program, startup_program):
-            data = layers.data(name='X', shape=[1], dtype='float32')
+            data = paddle.static.data(name='X', shape=[-1, 1], dtype='float32')
             data.stop_gradient = False
             cond = ConditionalBlock(inputs=[data])
             out = paddle.tensor.create_tensor(dtype='float32')
diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_api.py b/python/paddle/fluid/tests/unittests/test_conv2d_api.py
index 16bf938d1ca8abba4048a3b470f2288265bcea89..0c458c1fec484958842546617db4a4136a416234 100644
--- a/python/paddle/fluid/tests/unittests/test_conv2d_api.py
+++ b/python/paddle/fluid/tests/unittests/test_conv2d_api.py
@@ -26,17 +26,15 @@ import paddle.fluid.core as core
 class TestConv2DAPI(unittest.TestCase):
     def test_api(self):
 
-        input_NHWC = fluid.layers.data(
+        input_NHWC = paddle.static.data(
             name="input_NHWC",
             shape=[2, 5, 5, 3],
-            append_batch_size=False,
             dtype="float32",
         )
 
-        input_NCHW = fluid.layers.data(
+        input_NCHW = paddle.static.data(
             name="input_NCHW",
             shape=[2, 3, 5, 5],
-            append_batch_size=False,
             dtype="float32",
         )
 
@@ -120,10 +118,9 @@ class TestConv2DAPI(unittest.TestCase):
 
 class TestConv2DAPI_Error(unittest.TestCase):
     def test_api(self):
-        input = fluid.layers.data(
+        input = paddle.static.data(
             name="input",
             shape=[2, 5, 5, 5],
-            append_batch_size=False,
             dtype="float32",
         )
 
@@ -206,10 +203,9 @@ class TestConv2DAPI_Error(unittest.TestCase):
         self.assertRaises(ValueError, run_5)
 
         # ValueError: channel dimmention
-        x = fluid.layers.data(
+        x = paddle.static.data(
             name="x",
             shape=[2, 5, 5, -1],
-            append_batch_size=False,
             dtype="float32",
         )
 
@@ -293,10 +289,9 @@ class TestConv2DAPI_Error(unittest.TestCase):
         self.assertRaises(ValueError, run_10)
 
     def test_api_with_error_input(self):
-        input = fluid.layers.data(
+        input = paddle.static.data(
             name="error_input",
             shape=[1],
-            append_batch_size=False,
             dtype="float32",
         )
 
@@ -325,9 +320,8 @@ class TestConv2DAPI_Error(unittest.TestCase):
 class TestConv2DEnviron(unittest.TestCase):
     def run1(self, place):
         with fluid.program_guard(fluid.Program(), fluid.Program()):
-            inputs = fluid.layers.data(
+            inputs = paddle.static.data(
                 shape=[2, 3, 5, 5],
-                append_batch_size=False,
                 name="inputs",
                 dtype="float32",
             )
diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_op.py b/python/paddle/fluid/tests/unittests/test_conv2d_op.py
index ca7017a4adeaf13f5a2db028a931b23dcf0bbba6..60c729b4f96d3ae78abedf5032cb09a35aab1537 100644
--- a/python/paddle/fluid/tests/unittests/test_conv2d_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv2d_op.py
@@ -713,8 +713,8 @@ class TestConv2DOpError(unittest.TestCase):
             def test_dtype():
                 # the input dtype of conv2d must be float16 or float32 or float64
                 # float16 only can be set on GPU place
-                x2 = fluid.layers.data(
-                    name='x2', shape=[3, 4, 5, 6], dtype="int32"
+                x2 = paddle.static.data(
+                    name='x2', shape=[-1, 3, 4, 5, 6], dtype="int32"
                 )
                 paddle.static.nn.conv2d(x2, 1, 1)
 
diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py
index 2d4694be2e9e343aefa784f1241942a2b316a092..afbce517f624373813e1896dcd2edc3594ca0f48 100644
--- a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py
@@ -831,11 +831,11 @@ class TestCUDNNWithEvenUpsample_NHWC_FP16(TestCUDNN_FP16):
 
 class TestConv2DTransposeAPI(unittest.TestCase):
     def test_case1(self):
-        data1 = fluid.layers.data(
-            name='data1', shape=[3, 5, 5], dtype='float32'
+        data1 = paddle.static.data(
+            name='data1', shape=[-1, 3, 5, 5], dtype='float32'
         )
-        data2 = fluid.layers.data(
-            name='data2', shape=[5, 5, 3], dtype='float32'
+        data2 = paddle.static.data(
+            name='data2', shape=[-1, 5, 5, 3], dtype='float32'
         )
         out1 = paddle.static.nn.conv2d_transpose(
             input=data1,
@@ -918,7 +918,9 @@ class TestConv2DTransposeAPI(unittest.TestCase):
 
 class TestConv2DTransposeOpException(unittest.TestCase):
     def test_exception(self):
-        data = fluid.layers.data(name='data', shape=[3, 5, 5], dtype="float32")
+        data = paddle.static.data(
+            name='data', shape=[-1, 3, 5, 5], dtype="float32"
+        )
 
         def attr_data_format():
             out = paddle.static.nn.conv2d_transpose(
@@ -965,8 +967,8 @@ class TestConv2DTransposeOpException(unittest.TestCase):
 
         self.assertRaises(ValueError, attr_padding_with_data_format)
 
-        error_input = fluid.layers.data(
-            name='error_data', shape=[1], dtype="float32"
+        error_input = paddle.static.data(
+            name='error_data', shape=[-1, 1], dtype="float32"
         )
 
         def error_input_size():
diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_op.py b/python/paddle/fluid/tests/unittests/test_conv3d_op.py
index dc2760307cfbd06f1f20c151649d5ed75ba05e33..4930fd25678c99a257c9aade0625cec35bfa1c2e 100644
--- a/python/paddle/fluid/tests/unittests/test_conv3d_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv3d_op.py
@@ -18,7 +18,6 @@ import numpy as np
 from op_test import OpTest
 
 import paddle
-import paddle.fluid as fluid
 import paddle.fluid.core as core
 
 
@@ -865,17 +864,15 @@ create_test_cudnn_channel_last_class(TestWith1x1_AsyPadding)
 class TestConv3DAPI(unittest.TestCase):
     def test_api(self):
 
-        input_NDHWC = fluid.layers.data(
+        input_NDHWC = paddle.static.data(
             name="input_NDHWC",
             shape=[2, 5, 5, 5, 3],
-            append_batch_size=False,
             dtype="float32",
         )
 
-        input_NCDHW = fluid.layers.data(
+        input_NCDHW = paddle.static.data(
             name="input_NCDHW",
             shape=[2, 3, 5, 5, 3],
-            append_batch_size=False,
             dtype="float32",
         )
 
@@ -948,10 +945,9 @@ class TestConv3DAPI(unittest.TestCase):
 
 class TestConv3DAPI_Error(unittest.TestCase):
     def test_api(self):
-        input = fluid.layers.data(
+        input = paddle.static.data(
             name="input",
             shape=[2, 5, 5, 5, 4],
-            append_batch_size=False,
             dtype="float32",
         )
 
@@ -1034,10 +1030,9 @@ class TestConv3DAPI_Error(unittest.TestCase):
         self.assertRaises(ValueError, run_5)
 
         # ValueError: channel dimmention
-        x = fluid.layers.data(
+        x = paddle.static.data(
             name="x",
             shape=[2, 5, 5, 5, -1],
-            append_batch_size=False,
             dtype="float32",
         )
 
diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_part2_op.py b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_part2_op.py
index 0ad217f6810243a0f134529d5b8ca0bf55aabcfc..d47f341652075478902c85294eb834c1dbf26663 100644
--- a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_part2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_part2_op.py
@@ -86,11 +86,11 @@ class TestWithDilation_NHWC(TestConv3DTransposeOp):
 
 class TestConv3DTransposeAPI(unittest.TestCase):
     def test_case1(self):
-        data1 = fluid.layers.data(
-            name='data1', shape=[3, 5, 5, 5], dtype='float32'
+        data1 = paddle.static.data(
+            name='data1', shape=[-1, 3, 5, 5, 5], dtype='float32'
         )
-        data2 = fluid.layers.data(
-            name='data2', shape=[5, 5, 5, 3], dtype='float32'
+        data2 = paddle.static.data(
+            name='data2', shape=[-1, 5, 5, 5, 3], dtype='float32'
         )
 
         out1 = paddle.static.nn.conv3d_transpose(
@@ -174,8 +174,8 @@ class TestConv3DTransposeAPI(unittest.TestCase):
 
 class TestConv3DTransposeOpException(unittest.TestCase):
     def test_exception(self):
-        data = fluid.layers.data(
-            name='data', shape=[3, 5, 5, 5], dtype="float32"
+        data = paddle.static.data(
+            name='data', shape=[-1, 3, 5, 5, 5], dtype="float32"
         )
 
         def attr_data_format():
diff --git a/python/paddle/fluid/tests/unittests/test_conv_nn_grad.py b/python/paddle/fluid/tests/unittests/test_conv_nn_grad.py
index 2bb99a00be73a03a58a7c58448bfe96c006290a2..3dc153e58aaf2f9c7a020ac42fbc8dfbd1e332a2 100644
--- a/python/paddle/fluid/tests/unittests/test_conv_nn_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_conv_nn_grad.py
@@ -21,7 +21,6 @@ from decorator_helper import prog_scope
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import paddle.fluid.layers as layers
 
 
 class TestConvDoubleGradCheck(unittest.TestCase):
@@ -30,7 +29,7 @@ class TestConvDoubleGradCheck(unittest.TestCase):
         shape = [2, 4, 3, 3]
         eps = 0.005
         dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
-        x = layers.data('x', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
         y = paddle.static.nn.conv2d(x, 2, 1, groups=1, bias_attr=False)
         x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
 
@@ -57,7 +56,7 @@ class TestConvDoubleGradCheckTest0(unittest.TestCase):
         shape = [2, 4, 3, 3]
         eps = 0.005
         dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
-        x = layers.data('x', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
         y = paddle.static.nn.conv2d(x, 2, 1, bias_attr=False)
         x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
 
@@ -83,7 +82,7 @@ class TestConvDoubleGradCheckTest1(unittest.TestCase):
         shape = [2, 3, 3, 3]
         eps = 0.005
         dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
-        x = layers.data('x', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
         y = paddle.static.nn.conv2d(x, 2, 1, padding=1, bias_attr=False)
         x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
 
@@ -109,7 +108,7 @@ class TestConv3DDoubleGradCheck(unittest.TestCase):
         shape = [2, 4, 3, 4, 2]
         eps = 0.005
         dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
-        x = layers.data('x', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
         y = paddle.static.nn.conv3d(x, 2, 1, bias_attr=False)
         x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
 
@@ -136,7 +135,7 @@ class TestConv3DDoubleGradCheckTest1(unittest.TestCase):
         shape = [2, 4, 5, 3, 2]
         eps = 0.005
         dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
-        x = layers.data('x', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
         y = paddle.static.nn.conv3d(x, 2, 1, padding=1, bias_attr=False)
         x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
 
@@ -162,7 +161,7 @@ class TestConv2DoubleGradCheck_AsyPadding(unittest.TestCase):
         shape = [2, 2, 3, 3]
         eps = 0.005
         dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
-        x = layers.data('x', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
         y = paddle.static.nn.conv2d(
             input=x,
             num_filters=2,
@@ -195,7 +194,7 @@ class TestConv2DoubleGradCheck_PaddingSAME(unittest.TestCase):
         shape = [2, 2, 3, 3]
         eps = 0.005
         dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
-        x = layers.data('x', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
         y = paddle.static.nn.conv2d(
             input=x,
             num_filters=2,
@@ -228,7 +227,7 @@ class TestConv2DoubleGradCheck_PaddingVALID(unittest.TestCase):
         shape = [2, 2, 3, 3]
         eps = 0.005
         dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
-        x = layers.data('x', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
         y = paddle.static.nn.conv2d(
             input=x,
             num_filters=2,
@@ -261,7 +260,7 @@ class TestConv2DoubleGradCheck_ChannelLast(unittest.TestCase):
         shape = [2, 2, 3, 3]
         eps = 0.005
         dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
-        x = layers.data('x', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
         y = paddle.static.nn.conv2d(
             input=x,
             num_filters=2,
@@ -296,7 +295,7 @@ class TestConv2DoubleGradCheck_ChannelLast_AsyPadding(unittest.TestCase):
         shape = [2, 2, 3, 3]
         eps = 0.005
         dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
-        x = layers.data('x', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
         y = paddle.static.nn.conv2d(
             input=x,
             num_filters=2,
@@ -331,7 +330,7 @@ class TestConv3DDoubleGradCheck_AsyPadding(unittest.TestCase):
         shape = [2, 2, 2, 2, 2]
         eps = 0.005
         dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
-        x = layers.data('x', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
         y = paddle.static.nn.conv3d(
             input=x,
             num_filters=2,
@@ -364,7 +363,7 @@ class TestConv3DoubleGradCheck_PaddingSAME(unittest.TestCase):
         shape = [2, 2, 2, 2, 2]
         eps = 0.005
         dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
-        x = layers.data('x', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
         y = paddle.static.nn.conv3d(
             input=x,
             num_filters=2,
@@ -398,7 +397,7 @@ class TestConv3DoubleGradCheck_PaddingVALID(unittest.TestCase):
         shape = [2, 2, 3, 3, 2]
         eps = 0.005
         dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
-        x = layers.data('x', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
         y = paddle.static.nn.conv3d(
             input=x,
             num_filters=2,
@@ -431,7 +430,7 @@ class TestConv3DDoubleGradCheck_ChannelLast(unittest.TestCase):
         shape = [2, 2, 2, 2, 3]
         eps = 0.005
         dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
-        x = layers.data('x', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
         y = paddle.static.nn.conv3d(
             input=x,
             num_filters=2,
@@ -466,7 +465,7 @@ class TestConv3DDoubleGradCheck_ChannelLast_AsyPadding(unittest.TestCase):
         shape = [2, 2, 2, 2, 3]
         eps = 0.005
         dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
-        x = layers.data('x', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
         y = paddle.static.nn.conv3d(
             input=x,
             num_filters=2,
@@ -501,7 +500,7 @@ class TestDepthWiseConvDoubleGradCheck(unittest.TestCase):
         shape = [2, 4, 3, 3]
         eps = 0.005
         dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
-        x = layers.data('x', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
 
         # condition of depthwise conv:
         # use_cudnn == False
@@ -538,8 +537,8 @@ class TestDepthWiseConvDoubleGradCheckCase1(unittest.TestCase):
         w_shape = [4, 1, 3, 3]
         eps = 0.005
         dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
-        x = layers.data('x', x_shape, False, dtype)
-        w = layers.data('w', w_shape, False, dtype)
+        x = paddle.static.data('x', x_shape, dtype)
+        w = paddle.static.data('w', w_shape, dtype)
 
         # condition of depthwise conv:
         # use_cudnn == False
@@ -579,8 +578,8 @@ class TestConv3DDoubleGradCheck_NN(unittest.TestCase):
         w_shape = [6, 3, 3, 3, 3]
         eps = 0.005
         dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
-        x = layers.data('x', x_shape, False, dtype)
-        w = layers.data('w', w_shape, False, dtype)
+        x = paddle.static.data('x', x_shape, dtype)
+        w = paddle.static.data('w', w_shape, dtype)
         x.persistable = True
         w.persistable = True
         y = paddle.nn.functional.conv3d(x, w)
diff --git a/python/paddle/fluid/tests/unittests/test_conv_transpose_nn_grad.py b/python/paddle/fluid/tests/unittests/test_conv_transpose_nn_grad.py
index 142359286bbea41b7fc2ce4bb5c0d8bd0ea4ffdb..34838dcd32c9d1722d1317352bb70495eaf5271d 100644
--- a/python/paddle/fluid/tests/unittests/test_conv_transpose_nn_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_conv_transpose_nn_grad.py
@@ -21,7 +21,6 @@ from decorator_helper import prog_scope
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import paddle.fluid.layers as layers
 
 
 class TestConvTransposeDoubleGradCheck(unittest.TestCase):
@@ -35,7 +34,7 @@ class TestConvTransposeDoubleGradCheck(unittest.TestCase):
         dtype = np.float64
         if core.is_compiled_with_rocm():
             dtype = np.float32
-        x = layers.data('x', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
         y = paddle.static.nn.conv2d_transpose(
             x, 2, filter_size=1, groups=1, bias_attr=False
         )
@@ -91,7 +90,7 @@ class TestConvTranspose2DoubleGradCheck_AsyPadding(
         dtype = np.float64
         if core.is_compiled_with_rocm():
             dtype = np.float32
-        x = layers.data('x', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
         y = paddle.static.nn.conv2d_transpose(
             input=x,
             num_filters=2,
@@ -144,7 +143,7 @@ class TestConvTranspose2DoubleGradCheck_PaddingSAME(
         dtype = np.float64
         if core.is_compiled_with_rocm():
             dtype = np.float32
-        x = layers.data('x', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
         y = paddle.static.nn.conv2d_transpose(
             input=x,
             num_filters=2,
@@ -197,7 +196,7 @@ class TestConvTranspose2DoubleGradCheck_PaddingVALID(
         dtype = np.float64
         if core.is_compiled_with_rocm():
             dtype = np.float32
-        x = layers.data('x', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
         y = paddle.static.nn.conv2d_transpose(
             input=x,
             num_filters=2,
@@ -250,7 +249,7 @@ class TestConvTranspose2DoubleGradCheck_ChannelLast(
         dtype = np.float64
         if core.is_compiled_with_rocm():
             dtype = np.float32
-        x = layers.data('x', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
         y = paddle.static.nn.conv2d_transpose(
             input=x,
             num_filters=2,
diff --git a/python/paddle/fluid/tests/unittests/test_cross_entropy_op.py b/python/paddle/fluid/tests/unittests/test_cross_entropy_op.py
index a1f650dc63172de2dbad98d3362426a1b27f3366..ec5e5dbaa8a7f2f9043565f937706b70f3c1287f 100644
--- a/python/paddle/fluid/tests/unittests/test_cross_entropy_op.py
+++ b/python/paddle/fluid/tests/unittests/test_cross_entropy_op.py
@@ -429,11 +429,11 @@ class TestCrossEntropyOpError(unittest.TestCase):
             def test_dtype():
                 # the input dtype of cross_entropy must be float16 or float32 or float64
                 # float16 only can be set on GPU place
-                x2 = fluid.layers.data(
-                    name='x2', shape=[3, 4, 5, 6], dtype="int32"
+                x2 = paddle.static.data(
+                    name='x2', shape=[-1, 3, 4, 5, 6], dtype="int32"
                 )
-                lab2 = fluid.layers.data(
-                    name='lab2', shape=[3, 4, 5, 6], dtype="int32"
+                lab2 = paddle.static.data(
+                    name='lab2', shape=[-1, 3, 4, 5, 6], dtype="int32"
                 )
                 paddle.nn.functional.cross_entropy(
                     x2, lab2, reduction='none', use_softmax=False
diff --git a/python/paddle/fluid/tests/unittests/test_cross_op.py b/python/paddle/fluid/tests/unittests/test_cross_op.py
index 6cc366b85c89fa5c057e37029c5ce6bf68791161..29bdf93cf1c7ba6ce671c910796e7c0965e36e6c 100644
--- a/python/paddle/fluid/tests/unittests/test_cross_op.py
+++ b/python/paddle/fluid/tests/unittests/test_cross_op.py
@@ -69,18 +69,18 @@ class TestCrossAPI(unittest.TestCase):
     def input_data(self):
         self.data_x = np.array(
             [[1.0, 1.0, 1.0], [2.0, 2.0, 2.0], [3.0, 3.0, 3.0]]
-        )
+        ).astype('float32')
         self.data_y = np.array(
             [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]
-        )
+        ).astype('float32')
 
     def test_cross_api(self):
         self.input_data()
 
         # case 1:
         with program_guard(Program(), Program()):
-            x = fluid.layers.data(name='x', shape=[-1, 3])
-            y = fluid.layers.data(name='y', shape=[-1, 3])
+            x = paddle.static.data(name='x', shape=[-1, 3], dtype="float32")
+            y = paddle.static.data(name='y', shape=[-1, 3], dtype="float32")
             z = paddle.cross(x, y, axis=1)
             exe = fluid.Executor(fluid.CPUPlace())
             (res,) = exe.run(
@@ -95,8 +95,8 @@ class TestCrossAPI(unittest.TestCase):
 
         # case 2:
         with program_guard(Program(), Program()):
-            x = fluid.layers.data(name='x', shape=[-1, 3])
-            y = fluid.layers.data(name='y', shape=[-1, 3])
+            x = paddle.static.data(name='x', shape=[-1, 3], dtype="float32")
+            y = paddle.static.data(name='y', shape=[-1, 3], dtype="float32")
             z = paddle.cross(x, y)
             exe = fluid.Executor(fluid.CPUPlace())
             (res,) = exe.run(
diff --git a/python/paddle/fluid/tests/unittests/test_data.py b/python/paddle/fluid/tests/unittests/test_data.py
index 25b2372e817c4ba8f49ce1a98a84e4ca8b4d6818..0e2223767dd924cfa9ec065ec67984c29ce399af 100644
--- a/python/paddle/fluid/tests/unittests/test_data.py
+++ b/python/paddle/fluid/tests/unittests/test_data.py
@@ -17,7 +17,6 @@ import unittest
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import paddle.fluid.layers as layers
 from paddle.fluid import Program, program_guard
 
 
@@ -37,21 +36,6 @@ class TestApiDataError(unittest.TestCase):
 
             self.assertRaises(TypeError, test_shape_type)
 
-    def test_layers_data(self):
-        with program_guard(Program(), Program()):
-
-            # 1. The type of 'name' in layers.data must be str.
-            def test_name_type():
-                layers.data(name=1, shape=[2, 25], dtype="bool")
-
-            self.assertRaises(TypeError, test_name_type)
-
-            # 2. The type of 'shape' in layers.data must be list or tuple.
-            def test_shape_type():
-                layers.data(name='data1', shape=2, dtype="bool")
-
-            self.assertRaises(TypeError, test_shape_type)
-
 
 class TestApiStaticDataError(unittest.TestCase):
     def test_fluid_dtype(self):
@@ -81,16 +65,15 @@ class TestApiStaticDataError(unittest.TestCase):
 
             self.assertRaises(TypeError, test_shape_type)
 
-    def test_layers_data(self):
         with program_guard(Program(), Program()):
 
-            # 1. The type of 'name' in layers.data must be str.
+            # 1. The type of 'name' in paddle.static.data must be str.
             def test_name_type():
                 paddle.static.data(name=1, shape=[2, 25], dtype="bool")
 
             self.assertRaises(TypeError, test_name_type)
 
-            # 2. The type of 'shape' in layers.data must be list or tuple.
+            # 2. The type of 'shape' in paddle.static.data must be list or tuple.
             def test_shape_type():
                 paddle.static.data(name='data1', shape=2, dtype="bool")
 
@@ -102,9 +85,6 @@ class TestApiErrorWithDynamicMode(unittest.TestCase):
         with program_guard(Program(), Program()):
             paddle.disable_static()
             self.assertRaises(AssertionError, fluid.data, 'a', [2, 25])
-            self.assertRaises(
-                AssertionError, fluid.layers.data, 'b', shape=[2, 25]
-            )
             self.assertRaises(
                 AssertionError, paddle.static.data, 'c', shape=[2, 25]
             )
diff --git a/python/paddle/fluid/tests/unittests/test_data_norm_op.py b/python/paddle/fluid/tests/unittests/test_data_norm_op.py
index 2b84f2b5685af7280970105f91d91159689ff726..37fd9902736ec93997f99d16ff7bbe3a7f175434 100644
--- a/python/paddle/fluid/tests/unittests/test_data_norm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_data_norm_op.py
@@ -19,7 +19,6 @@ import numpy as np
 from op_test import OpTest
 
 import paddle
-import paddle.fluid as fluid
 import paddle.fluid.core as core
 from paddle.fluid import Program, program_guard
 from paddle.fluid.op import Operator
@@ -518,7 +517,7 @@ class TestDataNormOpWithSlotDim(OpTest):
 class TestDataNormOpErrorr(unittest.TestCase):
     def test_errors(self):
         with program_guard(Program(), Program()):
-            x2 = fluid.layers.data(name='x2', shape=[3, 4], dtype="int32")
+            x2 = paddle.static.data(name='x2', shape=[-1, 3, 4], dtype="int32")
             # self.assertRaises(TypeError, fluid.data_norm, x2)
             paddle.static.nn.data_norm(
                 input=x2, param_attr={}, enable_scale_and_shift=True
diff --git a/python/paddle/fluid/tests/unittests/test_dataset.py b/python/paddle/fluid/tests/unittests/test_dataset.py
index fb8c9ff6e556694f4a325710d55324016d072476..f98193ea64b1b4ef909c00ae3e79284241c82ec7 100644
--- a/python/paddle/fluid/tests/unittests/test_dataset.py
+++ b/python/paddle/fluid/tests/unittests/test_dataset.py
@@ -100,8 +100,8 @@ class TestDataset(unittest.TestCase):
         slots = ["slot1", "slot2", "slot3", "slot4"]
         slots_vars = []
         for slot in slots:
-            var = fluid.layers.data(
-                name=slot, shape=[1], dtype="int64", lod_level=1
+            var = paddle.static.data(
+                name=slot, shape=[-1, 1], dtype="int64", lod_level=1
             )
             slots_vars.append(var)
 
@@ -192,8 +192,8 @@ class TestDataset(unittest.TestCase):
         slots = ["slot1", "slot2", "slot3", "slot4"]
         slots_vars = []
         for slot in slots:
-            var = fluid.layers.data(
-                name=slot, shape=[1], dtype="int64", lod_level=1
+            var = paddle.static.data(
+                name=slot, shape=[-1, 1], dtype="int64", lod_level=1
             )
             slots_vars.append(var)
 
@@ -257,8 +257,8 @@ class TestDataset(unittest.TestCase):
         slots = ["slot1", "slot2", "slot3", "slot4"]
         slots_vars = []
         for slot in slots:
-            var = fluid.layers.data(
-                name=slot, shape=[1], dtype="int64", lod_level=1
+            var = paddle.static.data(
+                name=slot, shape=[-1, 1], dtype="int64", lod_level=1
             )
             slots_vars.append(var)
 
@@ -350,13 +350,13 @@ class TestDataset(unittest.TestCase):
         startup_program = fluid.Program()
         with fluid.program_guard(train_program, startup_program):
             for slot in slots[:2]:
-                var = fluid.layers.data(
-                    name=slot, shape=[1], dtype="int64", lod_level=1
+                var = paddle.static.data(
+                    name=slot, shape=[-1, 1], dtype="int64", lod_level=1
                 )
                 slots_vars.append(var)
             for slot in slots[2:]:
-                var = fluid.layers.data(
-                    name=slot, shape=[1], dtype="float32", lod_level=1
+                var = paddle.static.data(
+                    name=slot, shape=[-1, 1], dtype="float32", lod_level=1
                 )
                 slots_vars.append(var)
 
@@ -425,17 +425,17 @@ class TestDataset(unittest.TestCase):
         train_program = fluid.Program()
         startup_program = fluid.Program()
         with fluid.program_guard(train_program, startup_program):
-            var1 = fluid.layers.data(
-                name="slot1", shape=[1], dtype="int64", lod_level=0
+            var1 = paddle.static.data(
+                name="slot1", shape=[-1, 1], dtype="int64", lod_level=0
             )
-            var2 = fluid.layers.data(
-                name="slot2", shape=[1], dtype="int64", lod_level=0
+            var2 = paddle.static.data(
+                name="slot2", shape=[-1, 1], dtype="int64", lod_level=0
             )
-            var3 = fluid.layers.data(
-                name="slot3", shape=[1], dtype="float32", lod_level=0
+            var3 = paddle.static.data(
+                name="slot3", shape=[-1, 1], dtype="float32", lod_level=0
             )
-            var4 = fluid.layers.data(
-                name="slot4", shape=[1], dtype="float32", lod_level=0
+            var4 = paddle.static.data(
+                name="slot4", shape=[-1, 1], dtype="float32", lod_level=0
             )
             slots_vars = [var1, var2, var3, var4]
 
@@ -498,8 +498,8 @@ class TestDataset(unittest.TestCase):
         slots = ["slot1_f", "slot2_f", "slot3_f", "slot4_f"]
         slots_vars = []
         for slot in slots:
-            var = fluid.layers.data(
-                name=slot, shape=[1], dtype="float32", lod_level=1
+            var = paddle.static.data(
+                name=slot, shape=[-1, 1], dtype="float32", lod_level=1
             )
             slots_vars.append(var)
 
@@ -614,8 +614,8 @@ class TestDataset(unittest.TestCase):
         slots = ["slot1", "slot2", "slot3", "slot4"]
         slots_vars = []
         for slot in slots:
-            var = fluid.layers.data(
-                name=slot, shape=[1], dtype="int64", lod_level=1
+            var = paddle.static.data(
+                name=slot, shape=[-1, 1], dtype="int64", lod_level=1
             )
             slots_vars.append(var)
 
@@ -682,8 +682,8 @@ class TestDataset(unittest.TestCase):
         slots = ["slot1_f", "slot2_f", "slot3_f", "slot4_f"]
         slots_vars = []
         for slot in slots:
-            var = fluid.layers.data(
-                name=slot, shape=[1], dtype="float32", lod_level=1
+            var = paddle.static.data(
+                name=slot, shape=[-1, 1], dtype="float32", lod_level=1
             )
             slots_vars.append(var)
 
@@ -807,8 +807,8 @@ class TestDataset(unittest.TestCase):
         slots = ["slot1", "slot2", "slot3", "slot4"]
         slots_vars = []
         for slot in slots:
-            var = fluid.layers.data(
-                name=slot, shape=[1], dtype="int64", lod_level=1
+            var = paddle.static.data(
+                name=slot, shape=[-1, 1], dtype="int64", lod_level=1
             )
             slots_vars.append(var)
 
@@ -872,8 +872,8 @@ class TestDataset(unittest.TestCase):
         slots = ["slot1", "slot2", "slot3", "slot4"]
         slots_vars = []
         for slot in slots:
-            var = fluid.layers.data(
-                name=slot, shape=[1], dtype="int64", lod_level=1
+            var = paddle.static.data(
+                name=slot, shape=[-1, 1], dtype="int64", lod_level=1
             )
             slots_vars.append(var)
 
@@ -944,8 +944,8 @@ class TestDatasetWithFetchHandler(unittest.TestCase):
         slots_vars = []
         poolings = []
         for slot in slots:
-            data = fluid.layers.data(
-                name=slot, shape=[1], dtype="int64", lod_level=1
+            data = paddle.static.data(
+                name=slot, shape=[-1, 1], dtype="int64", lod_level=1
             )
             var = fluid.layers.cast(x=data, dtype='float32')
             pool = fluid.layers.sequence_pool(input=var, pool_type='AVERAGE')
@@ -1117,8 +1117,8 @@ class TestDataset2(unittest.TestCase):
             slots = ["slot1_ff", "slot2_ff", "slot3_ff", "slot4_ff"]
             slots_vars = []
             for slot in slots:
-                var = fluid.layers.data(
-                    name=slot, shape=[1], dtype="float32", lod_level=1
+                var = paddle.static.data(
+                    name=slot, shape=[-1, 1], dtype="float32", lod_level=1
                 )
                 slots_vars.append(var)
             fake_cost = paddle.subtract(slots_vars[0], slots_vars[-1])
@@ -1187,8 +1187,8 @@ class TestDataset2(unittest.TestCase):
             slots = ["slot1_ff", "slot2_ff", "slot3_ff", "slot4_ff"]
             slots_vars = []
             for slot in slots:
-                var = fluid.layers.data(
-                    name=slot, shape=[1], dtype="float32", lod_level=1
+                var = paddle.static.data(
+                    name=slot, shape=[-1, 1], dtype="float32", lod_level=1
                 )
                 slots_vars.append(var)
             fake_cost = paddle.subtract(slots_vars[0], slots_vars[-1])
@@ -1318,8 +1318,8 @@ class TestDataset2(unittest.TestCase):
             slots = ["slot1_ff", "slot2_ff", "slot3_ff", "slot4_ff"]
             slots_vars = []
             for slot in slots:
-                var = fluid.layers.data(
-                    name=slot, shape=[1], dtype="float32", lod_level=1
+                var = paddle.static.data(
+                    name=slot, shape=[-1, 1], dtype="float32", lod_level=1
                 )
                 slots_vars.append(var)
             fake_cost = paddle.subtract(slots_vars[0], slots_vars[-1])
diff --git a/python/paddle/fluid/tests/unittests/test_dataset_consistency_inspection.py b/python/paddle/fluid/tests/unittests/test_dataset_consistency_inspection.py
index 63814b468245ed20807b816cec5785cc9bb60628..1a8d4de560ab79b37cc417fb83e6f980e61f77ec 100644
--- a/python/paddle/fluid/tests/unittests/test_dataset_consistency_inspection.py
+++ b/python/paddle/fluid/tests/unittests/test_dataset_consistency_inspection.py
@@ -394,12 +394,11 @@ class TestDataset(unittest.TestCase):
             f.write(data)
 
         slot_data = []
-        label = fluid.layers.data(
+        label = paddle.static.data(
             name="click",
             shape=[-1, 1],
             dtype="int64",
             lod_level=0,
-            append_batch_size=False,
         )
         slot_data.append(label)
 
@@ -407,56 +406,65 @@ class TestDataset(unittest.TestCase):
         len_sparse_query = 19
         for feat_name in range(1, len_sparse_query + 1):
             slot_data.append(
-                fluid.layers.data(
-                    name=str(feat_name), shape=[1], dtype='int64', lod_level=1
+                paddle.static.data(
+                    name=str(feat_name),
+                    shape=[-1, 1],
+                    dtype='int64',
+                    lod_level=1,
                 )
             )
 
         # sparse_url_feat_names
         for feat_name in range(len_sparse_query + 1, len_sparse_query + 5):
             slot_data.append(
-                fluid.layers.data(
-                    name=str(feat_name), shape=[1], dtype='int64', lod_level=1
+                paddle.static.data(
+                    name=str(feat_name),
+                    shape=[-1, 1],
+                    dtype='int64',
+                    lod_level=1,
                 )
             )
 
         # dense_feat_names
         for feat_name in range(len_sparse_query + 5, len_sparse_query + 16):
             slot_data.append(
-                fluid.layers.data(
-                    name=str(feat_name), shape=[1], dtype='float32'
+                paddle.static.data(
+                    name=str(feat_name), shape=[-1, 1], dtype='float32'
                 )
             )
 
         # context_feat_namess
         for feat_name in range(len_sparse_query + 16, len_sparse_query + 18):
             slot_data.append(
-                fluid.layers.data(
-                    name=str(feat_name), shape=[1], dtype='float32'
+                paddle.static.data(
+                    name=str(feat_name), shape=[-1, 1], dtype='float32'
                 )
             )
 
         # neg sparse_url_feat_names
         for feat_name in range(len_sparse_query + 18, len_sparse_query + 22):
             slot_data.append(
-                fluid.layers.data(
-                    name=str(feat_name), shape=[1], dtype='int64', lod_level=1
+                paddle.static.data(
+                    name=str(feat_name),
+                    shape=[-1, 1],
+                    dtype='int64',
+                    lod_level=1,
                 )
             )
 
         # neg dense_feat_names
         for feat_name in range(len_sparse_query + 22, len_sparse_query + 33):
             slot_data.append(
-                fluid.layers.data(
-                    name=str(feat_name), shape=[1], dtype='float32'
+                paddle.static.data(
+                    name=str(feat_name), shape=[-1, 1], dtype='float32'
                 )
             )
 
         # neg context_feat_namess
         for feat_name in range(len_sparse_query + 33, len_sparse_query + 35):
             slot_data.append(
-                fluid.layers.data(
-                    name=str(feat_name), shape=[1], dtype='float32'
+                paddle.static.data(
+                    name=str(feat_name), shape=[-1, 1], dtype='float32'
                 )
             )
 
diff --git a/python/paddle/fluid/tests/unittests/test_dataset_dataloader.py b/python/paddle/fluid/tests/unittests/test_dataset_dataloader.py
index 37443f6aa501a39adc7f8bf631cf93930e61b699..078ffb4e863b68d541022a6c2e112b711961445c 100644
--- a/python/paddle/fluid/tests/unittests/test_dataset_dataloader.py
+++ b/python/paddle/fluid/tests/unittests/test_dataset_dataloader.py
@@ -86,11 +86,11 @@ class DatasetLoaderTestBase(unittest.TestCase):
         main_prog = fluid.Program()
         startup_prog = fluid.Program()
         with fluid.program_guard(main_prog, startup_prog):
-            image = fluid.layers.data(
-                name='image', shape=IMAGE_SHAPE, dtype='float32'
+            image = paddle.static.data(
+                name='image', shape=[-1] + IMAGE_SHAPE, dtype='float32'
             )
-            label = fluid.layers.data(
-                name='label', shape=LABEL_SHAPE, dtype='int64'
+            label = paddle.static.data(
+                name='label', shape=[-1] + LABEL_SHAPE, dtype='int64'
             )
 
             simple_fc_net_with_inputs(image, label)
diff --git a/python/paddle/fluid/tests/unittests/test_decoupled_py_reader.py b/python/paddle/fluid/tests/unittests/test_decoupled_py_reader.py
index 6f417973ee228d654bbd71628e356a79c38f9f3f..e02282cb9bee18d2ef0db016bc39f0e77cd3e717 100644
--- a/python/paddle/fluid/tests/unittests/test_decoupled_py_reader.py
+++ b/python/paddle/fluid/tests/unittests/test_decoupled_py_reader.py
@@ -42,10 +42,12 @@ def simple_fc_net(places, use_legacy_py_reader, use_double_buffer):
 
     with fluid.unique_name.guard():
         with fluid.program_guard(main_prog, startup_prog):
-            image = fluid.layers.data(
-                name='image', shape=[784], dtype='float32'
+            image = paddle.static.data(
+                name='image', shape=[-1, 784], dtype='float32'
+            )
+            label = paddle.static.data(
+                name='label', shape=[-1, 1], dtype='int64'
             )
-            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
             py_reader = fluid.io.PyReader(
                 feed_list=[image, label],
                 capacity=4,
diff --git a/python/paddle/fluid/tests/unittests/test_decoupled_py_reader_data_check.py b/python/paddle/fluid/tests/unittests/test_decoupled_py_reader_data_check.py
index 1a7118cfb8279c98b7d8913572263fc29c430e0e..53ef9b02ccb105c8fc63031731fcd168fde200b1 100644
--- a/python/paddle/fluid/tests/unittests/test_decoupled_py_reader_data_check.py
+++ b/python/paddle/fluid/tests/unittests/test_decoupled_py_reader_data_check.py
@@ -50,11 +50,11 @@ class TestClass(unittest.TestCase):
             main_prog = fluid.Program()
             startup_prog = fluid.Program()
             with fluid.program_guard(main_prog, startup_prog):
-                img = fluid.layers.data(
-                    shape=img_shape, dtype='float32', name='image'
+                img = paddle.static.data(
+                    shape=[-1] + img_shape, dtype='float32', name='image'
                 )
-                label = fluid.layers.data(
-                    shape=label_shape, dtype='int64', name='label'
+                label = paddle.static.data(
+                    shape=[-1] + label_shape, dtype='int64', name='label'
                 )
 
                 feeder = fluid.DataFeeder(feed_list=[img, label], place=p)
diff --git a/python/paddle/fluid/tests/unittests/test_desc_clone.py b/python/paddle/fluid/tests/unittests/test_desc_clone.py
index ed1f9a9aaf9d95ac60ed3e89ce7123e9b7be0da1..ecb49c3172fb92a9bccb420030a51febe44b48e7 100644
--- a/python/paddle/fluid/tests/unittests/test_desc_clone.py
+++ b/python/paddle/fluid/tests/unittests/test_desc_clone.py
@@ -68,8 +68,10 @@ def cnn_model(data):
 
 def get_model(batch_size):
     # Input data
-    images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE)
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    images = paddle.static.data(
+        name='pixel', shape=[-1, 1, 28, 28], dtype=DTYPE
+    )
+    label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
 
     # Train program
     predict = cnn_model(images)
@@ -186,7 +188,7 @@ class TestCloneWithStopGradient(unittest.TestCase):
         train_program = fluid.Program()
         startup_program = fluid.Program()
         with fluid.program_guard(train_program, startup_program):
-            img = fluid.layers.data(name='image', shape=[784])
+            img = paddle.static.data(name='image', shape=[-1, 784])
             hidden1 = paddle.static.nn.fc(x=img, size=200, activation='relu')
             hidden1.stop_gradient = True
             hidden2 = paddle.nn.functional.dropout(hidden1, p=0.5)
@@ -194,7 +196,9 @@ class TestCloneWithStopGradient(unittest.TestCase):
                 input=paddle.static.nn.fc(
                     hidden2, size=10, activation='softmax'
                 ),
-                label=fluid.layers.data(name='label', shape=[1], dtype='int64'),
+                label=paddle.static.data(
+                    name='label', shape=[-1, 1], dtype='int64'
+                ),
                 reduction='none',
                 use_softmax=False,
             )
@@ -214,7 +218,7 @@ class TestCloneWithStopGradientInSubBlock(unittest.TestCase):
         train_program = fluid.Program()
         startup_program = fluid.Program()
         with fluid.program_guard(train_program, startup_program):
-            img = fluid.layers.data(name='image', shape=[784])
+            img = paddle.static.data(name='image', shape=[-1, 784])
             true = paddle.ones(shape=[1], dtype="float32")
             hidden1 = paddle.static.nn.fc(x=img, size=200, activation='relu')
             hidden1.stop_gradient = True
@@ -236,7 +240,9 @@ class TestCloneWithStopGradientInSubBlock(unittest.TestCase):
                 input=paddle.static.nn.fc(
                     hidden2, size=10, activation='softmax'
                 ),
-                label=fluid.layers.data(name='label', shape=[1], dtype='int64'),
+                label=paddle.static.data(
+                    name='label', shape=[-1, 1], dtype='int64'
+                ),
                 reduction='none',
                 use_softmax=False,
             )
@@ -259,7 +265,7 @@ class TestCloneWithRaise(unittest.TestCase):
         train_program = fluid.Program()
         startup_program = fluid.Program()
         with fluid.program_guard(train_program, startup_program):
-            img = fluid.layers.data(name='image', shape=[784])
+            img = paddle.static.data(name='image', shape=[-1, 784])
             true = paddle.ones(shape=[1], dtype="float32")
             hidden1 = paddle.static.nn.fc(x=img, size=200, activation='relu')
             hidden1.stop_gradient = True
@@ -280,7 +286,9 @@ class TestCloneWithRaise(unittest.TestCase):
                 input=paddle.static.nn.fc(
                     hidden2, size=10, activation='softmax'
                 ),
-                label=fluid.layers.data(name='label', shape=[1], dtype='int64'),
+                label=paddle.static.data(
+                    name='label', shape=[-1, 1], dtype='int64'
+                ),
                 reduction='none',
                 use_softmax=False,
             )
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py
index c80e1a68fddaddf489f8ee607caad17c85ca72e8..cb58970e88c739d873bde5d6d74a98dfe98bb2c8 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py
@@ -45,8 +45,8 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase):
 
         fleet.init(role_maker.PaddleCloudRoleMaker())
 
-        x = paddle.fluid.layers.data(name='x', shape=[1], dtype='float32')
-        y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32')
+        x = paddle.static.data(name='x', shape=[-1, 1], dtype='float32')
+        y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
         cost = paddle.nn.functional.square_error_cost(input=x, label=y)
         avg_cost = paddle.mean(cost)
 
@@ -83,8 +83,8 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase):
 
         fleet.init(role_maker.PaddleCloudRoleMaker())
 
-        x = paddle.fluid.layers.data(name='x', shape=[1], dtype='float32')
-        y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32')
+        x = paddle.static.data(name='x', shape=[-1, 1], dtype='float32')
+        y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
         cost = paddle.nn.functional.square_error_cost(input=x, label=y)
         avg_cost = paddle.mean(cost)
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto.py
index 080319304186f752cfb9cd98740d96caee41afac..d0521a59183a1e369855ea36004cfa9fb450df78 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto.py
@@ -44,10 +44,8 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase):
         paddle.fluid.framework.switch_startup_program(startup_program)
 
         fleet.init(role_maker.PaddleCloudRoleMaker())
-        input_x = paddle.fluid.layers.data(
-            name="x", shape=[32], dtype='float32'
-        )
-        input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64')
+        input_x = paddle.static.data(name="x", shape=[-1, 32], dtype='float32')
+        input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64')
 
         fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
         fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py
index 725c2559db051bb9f79e51cb9123f27be060aa8e..c560dfa8dbb0b0d99bebcbb3f2098faa76e19ba6 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py
@@ -46,12 +46,11 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase):
         paddle.fluid.framework.switch_startup_program(startup_program)
 
         fleet.init(role_maker.PaddleCloudRoleMaker())
-        input_x = paddle.fluid.layers.data(
+        input_x = paddle.static.data(
             name="x",
             shape=[-1, 1],
             dtype="int64",
             lod_level=1,
-            append_batch_size=False,
         )
         x_embedding = paddle.fluid.layers.embedding(
             is_distributed=False,
@@ -63,7 +62,7 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase):
             ),
             is_sparse=True,
         )
-        input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64')
+        input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64')
 
         fc_1 = paddle.static.nn.fc(x=x_embedding, size=64, activation='tanh')
         fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_geo.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_geo.py
index c25e60793fe03639dbdcd0cdd1e124071f52b811..9eac239742743d9125f8dfca51d6d49dc8d9ca7b 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_geo.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_geo.py
@@ -46,8 +46,8 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase):
 
         fleet.init(role_maker.PaddleCloudRoleMaker())
 
-        input_x = paddle.fluid.layers.data(name="x", shape=[1], dtype='int64')
-        input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64')
+        input_x = paddle.static.data(name="x", shape=[-1, 1], dtype='int64')
+        input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64')
 
         emb = paddle.fluid.layers.embedding(
             input=input_x, size=[100, 10], is_sparse=True
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_geo.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_geo.py
index 3832fd5de23f72d4e83ed65ae0296db9b6a05841..c8470f64ebbc4f2a734e081604f17e994468233c 100755
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_geo.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_geo.py
@@ -43,10 +43,8 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase):
         paddle.fluid.framework.switch_startup_program(startup_program)
 
         fleet.init(role_maker.PaddleCloudRoleMaker())
-        input_x = paddle.fluid.layers.data(
-            name="x", shape=[32], dtype='float32'
-        )
-        input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64')
+        input_x = paddle.static.data(name="x", shape=[-1, 32], dtype='float32')
+        input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64')
 
         fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
         fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
@@ -76,10 +74,8 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase):
         paddle.fluid.framework.switch_startup_program(startup_program)
 
         fleet.init(role_maker.PaddleCloudRoleMaker())
-        input_x = paddle.fluid.layers.data(
-            name="x", shape=[32], dtype='float32'
-        )
-        input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64')
+        input_x = paddle.static.data(name="x", shape=[-1, 32], dtype='float32')
+        input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64')
 
         fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
         fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py
index fdaa0a69c8d20839e5004cf696a037f527e4d613..f1ff43911261e6fe2148b733fcc2951b8be49acb 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py
@@ -38,8 +38,8 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase):
     def test_gradient_merge_optimizer(self):
         fleet.init(role_maker.PaddleCloudRoleMaker())
 
-        x = paddle.fluid.layers.data(name='x', shape=[1], dtype='float32')
-        y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32')
+        x = paddle.static.data(name='x', shape=[-1, 1], dtype='float32')
+        y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
         cost = paddle.nn.functional.square_error_cost(input=x, label=y)
         avg_cost = paddle.mean(cost)
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py
index 376a0d087e6d72b61309d0fdf3fb797707aed978..bc17b0d67f9908185921b996d5a793680044baeb 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py
@@ -65,18 +65,18 @@ class TestDistFleetHeterProgram(unittest.TestCase):
         return self.strategy
 
     def build_input(self):
-        dense_input = fluid.layers.data(
-            name="dense_input", shape=[10], dtype="float32"
+        dense_input = paddle.static.data(
+            name="dense_input", shape=[-1, 10], dtype="float32"
         )
 
         sparse_input_ids = [
-            fluid.layers.data(
-                name="C" + str(i), shape=[1], lod_level=1, dtype="int64"
+            paddle.static.data(
+                name="C" + str(i), shape=[-1, 1], lod_level=1, dtype="int64"
             )
             for i in range(1, 27)
         ]
 
-        label = fluid.layers.data(name="label", shape=[1], dtype="float32")
+        label = paddle.static.data(name="label", shape=[-1, 1], dtype="float32")
 
         inputs = [dense_input] + sparse_input_ids + [label]
         return inputs
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_minimize.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_minimize.py
index 472f8a6ced6fdc70df933ec086276d1a02aa3dec..b60ff0db63e7dd5c67b23fb3f18d3c2564e9d6a0 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_minimize.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_minimize.py
@@ -69,7 +69,9 @@ class TestPSMinimize(unittest.TestCase):
         is_sparse = True
 
         # query
-        q = fluid.layers.data(name="1", shape=[1], dtype="int64", lod_level=1)
+        q = paddle.static.data(
+            name="1", shape=[-1, 1], dtype="int64", lod_level=1
+        )
         # embedding
         q_emb = fluid.contrib.layers.sparse_embedding(
             input=q,
@@ -95,9 +97,11 @@ class TestPSMinimize(unittest.TestCase):
             ),
         )
         # label data
-        label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+        label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64")
         # pt
-        pt = fluid.layers.data(name="2", shape=[1], dtype="int64", lod_level=1)
+        pt = paddle.static.data(
+            name="2", shape=[-1, 1], dtype="int64", lod_level=1
+        )
         # embedding
         pt_emb = fluid.contrib.layers.sparse_embedding(
             input=pt,
@@ -124,7 +128,9 @@ class TestPSMinimize(unittest.TestCase):
             bias_attr=fluid.ParamAttr(name="__fc_b__"),
         )
         # nt
-        nt = fluid.layers.data(name="3", shape=[1], dtype="int64", lod_level=1)
+        nt = paddle.static.data(
+            name="3", shape=[-1, 1], dtype="int64", lod_level=1
+        )
         # embedding
         nt_emb = fluid.contrib.layers.sparse_embedding(
             input=nt,
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py
index c879875f6f771524573b208625a21595b49b3dc7..a330b45b52228c8bc5ad2ddeeee7187a1705d2d1 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py
@@ -69,8 +69,8 @@ class TestPSPassWithBow(unittest.TestCase):
         is_sparse = True
 
         # query
-        q = fluid.layers.data(
-            name="query_ids", shape=[1], dtype="int64", lod_level=1
+        q = paddle.static.data(
+            name="query_ids", shape=[-1, 1], dtype="int64", lod_level=1
         )
         # embedding
         q_emb = fluid.layers.embedding(
@@ -99,10 +99,10 @@ class TestPSPassWithBow(unittest.TestCase):
             ),
         )
         # label data
-        label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+        label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64")
         # pt
-        pt = fluid.layers.data(
-            name="pos_title_ids", shape=[1], dtype="int64", lod_level=1
+        pt = paddle.static.data(
+            name="pos_title_ids", shape=[-1, 1], dtype="int64", lod_level=1
         )
         # embedding
         pt_emb = fluid.layers.embedding(
@@ -132,8 +132,8 @@ class TestPSPassWithBow(unittest.TestCase):
             bias_attr=fluid.ParamAttr(name="__fc_b__"),
         )
         # nt
-        nt = fluid.layers.data(
-            name="neg_title_ids", shape=[1], dtype="int64", lod_level=1
+        nt = paddle.static.data(
+            name="neg_title_ids", shape=[-1, 1], dtype="int64", lod_level=1
         )
         # embedding
         nt_emb = fluid.layers.embedding(
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py
index 668b64d19390cf153b8d02618817cbbf7bb9a9df..2143dc94d39e043e1a282fc679c794a231e60a01 100755
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py
@@ -69,7 +69,9 @@ class TestPSPassWithBow(unittest.TestCase):
         is_sparse = True
 
         # query
-        q = fluid.layers.data(name="1", shape=[1], dtype="int64", lod_level=1)
+        q = paddle.static.data(
+            name="1", shape=[-1, 1], dtype="int64", lod_level=1
+        )
         # embedding
         q_emb = fluid.contrib.layers.sparse_embedding(
             input=q,
@@ -95,9 +97,11 @@ class TestPSPassWithBow(unittest.TestCase):
             ),
         )
         # label data
-        label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+        label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64")
         # pt
-        pt = fluid.layers.data(name="2", shape=[1], dtype="int64", lod_level=1)
+        pt = paddle.static.data(
+            name="2", shape=[-1, 1], dtype="int64", lod_level=1
+        )
         # embedding
         pt_emb = fluid.contrib.layers.sparse_embedding(
             input=pt,
@@ -124,7 +128,9 @@ class TestPSPassWithBow(unittest.TestCase):
             bias_attr=fluid.ParamAttr(name="__fc_b__"),
         )
         # nt
-        nt = fluid.layers.data(name="3", shape=[1], dtype="int64", lod_level=1)
+        nt = paddle.static.data(
+            name="3", shape=[-1, 1], dtype="int64", lod_level=1
+        )
         # embedding
         nt_emb = fluid.contrib.layers.sparse_embedding(
             input=nt,
@@ -204,8 +210,8 @@ class TestPSPassWithBow(unittest.TestCase):
         slots = ["slot1", "slot2", "slot3", "slot4"]
         slots_vars = []
         for slot in slots:
-            var = fluid.layers.data(
-                name=slot, shape=[1], dtype="int64", lod_level=1
+            var = paddle.static.data(
+                name=slot, shape=[-1, 1], dtype="int64", lod_level=1
             )
             slots_vars.append(var)
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py
index 5aa14fba6a5dab4266d669f1b45a8348d63e7ec3..bee3cd9eb2239e48d6df210eb1511445d25fbf1c 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py
@@ -72,7 +72,9 @@ class TestPSPassWithBow(unittest.TestCase):
         is_sparse = True
 
         # query
-        q = fluid.layers.data(name="1", shape=[1], dtype="int64", lod_level=1)
+        q = paddle.static.data(
+            name="1", shape=[-1, 1], dtype="int64", lod_level=1
+        )
         # embedding
         q_emb = fluid.contrib.layers.sparse_embedding(
             input=q,
@@ -98,9 +100,11 @@ class TestPSPassWithBow(unittest.TestCase):
             ),
         )
         # label data
-        label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+        label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64")
         # pt
-        pt = fluid.layers.data(name="2", shape=[1], dtype="int64", lod_level=1)
+        pt = paddle.static.data(
+            name="2", shape=[-1, 1], dtype="int64", lod_level=1
+        )
         # embedding
         pt_emb = fluid.contrib.layers.sparse_embedding(
             input=pt,
@@ -127,7 +131,9 @@ class TestPSPassWithBow(unittest.TestCase):
             bias_attr=fluid.ParamAttr(name="__fc_b__"),
         )
         # nt
-        nt = fluid.layers.data(name="3", shape=[1], dtype="int64", lod_level=1)
+        nt = paddle.static.data(
+            name="3", shape=[-1, 1], dtype="int64", lod_level=1
+        )
         # embedding
         nt_emb = fluid.contrib.layers.sparse_embedding(
             input=nt,
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py
index 8ecb4e2a2ad8f61f6a988e3cd516bfdd79b8f4c1..58248d325b1452e0525f68f20276017e7ad7e814 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py
@@ -73,8 +73,8 @@ class TestPSPassWithBow(unittest.TestCase):
         is_sparse = True
 
         # query
-        q = fluid.layers.data(
-            name="query_ids", shape=[1], dtype="int64", lod_level=1
+        q = paddle.static.data(
+            name="query_ids", shape=[-1, 1], dtype="int64", lod_level=1
         )
         # embedding
         q_emb = fluid.contrib.layers.sparse_embedding(
@@ -101,10 +101,10 @@ class TestPSPassWithBow(unittest.TestCase):
             ),
         )
         # label data
-        label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+        label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64")
         # pt
-        pt = fluid.layers.data(
-            name="pos_title_ids", shape=[1], dtype="int64", lod_level=1
+        pt = paddle.static.data(
+            name="pos_title_ids", shape=[-1, 1], dtype="int64", lod_level=1
         )
         # embedding
         pt_emb = fluid.contrib.layers.sparse_embedding(
@@ -132,8 +132,8 @@ class TestPSPassWithBow(unittest.TestCase):
             bias_attr=fluid.ParamAttr(name="__fc_b__"),
         )
         # nt
-        nt = fluid.layers.data(
-            name="neg_title_ids", shape=[1], dtype="int64", lod_level=1
+        nt = paddle.static.data(
+            name="neg_title_ids", shape=[-1, 1], dtype="int64", lod_level=1
         )
         # embedding
         nt_emb = fluid.contrib.layers.sparse_embedding(
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py
index d8dfcda35e235dddfd64855af3029ab0eb39a299..e207fb859de54a49a39977e6fdecad9938282a60 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py
@@ -72,8 +72,8 @@ class TestPSPassWithBow(unittest.TestCase):
         is_sparse = True
 
         # query
-        q = fluid.layers.data(
-            name="query_ids", shape=[1], dtype="int64", lod_level=1
+        q = paddle.static.data(
+            name="query_ids", shape=[-1, 1], dtype="int64", lod_level=1
         )
         # embedding
         q_emb = fluid.contrib.layers.sparse_embedding(
@@ -101,10 +101,10 @@ class TestPSPassWithBow(unittest.TestCase):
             ),
         )
         # label data
-        label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+        label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64")
         # pt
-        pt = fluid.layers.data(
-            name="pos_title_ids", shape=[1], dtype="int64", lod_level=1
+        pt = paddle.static.data(
+            name="pos_title_ids", shape=[-1, 1], dtype="int64", lod_level=1
         )
         # embedding
         pt_emb = fluid.contrib.layers.sparse_embedding(
@@ -132,8 +132,8 @@ class TestPSPassWithBow(unittest.TestCase):
             bias_attr=fluid.ParamAttr(name="__fc_b__"),
         )
         # nt
-        nt = fluid.layers.data(
-            name="neg_title_ids", shape=[1], dtype="int64", lod_level=1
+        nt = paddle.static.data(
+            name="neg_title_ids", shape=[-1, 1], dtype="int64", lod_level=1
         )
         # embedding
         nt_emb = fluid.contrib.layers.sparse_embedding(
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py
index c4517cc28471f2f6fd89bfea78a9fd8ad0e79751..4093fc34cc998417fd0a187a43f2f42a84777791 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py
@@ -69,8 +69,8 @@ class TestPSPassWithBow(unittest.TestCase):
         is_sparse = False
 
         # query
-        q = fluid.layers.data(
-            name="query_ids", shape=[1], dtype="int64", lod_level=1
+        q = paddle.static.data(
+            name="query_ids", shape=[-1, 1], dtype="int64", lod_level=1
         )
         # embedding
         q_emb = fluid.layers.embedding(
@@ -99,10 +99,10 @@ class TestPSPassWithBow(unittest.TestCase):
             ),
         )
         # label data
-        label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+        label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64")
         # pt
-        pt = fluid.layers.data(
-            name="pos_title_ids", shape=[1], dtype="int64", lod_level=1
+        pt = paddle.static.data(
+            name="pos_title_ids", shape=[-1, 1], dtype="int64", lod_level=1
         )
         # embedding
         pt_emb = fluid.layers.embedding(
@@ -132,8 +132,8 @@ class TestPSPassWithBow(unittest.TestCase):
             bias_attr=fluid.ParamAttr(name="__fc_b__"),
         )
         # nt
-        nt = fluid.layers.data(
-            name="neg_title_ids", shape=[1], dtype="int64", lod_level=1
+        nt = paddle.static.data(
+            name="neg_title_ids", shape=[-1, 1], dtype="int64", lod_level=1
         )
         # embedding
         nt_emb = fluid.layers.embedding(
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py
index 93c2d48f89777a83ff7453396fe6cbfb9b538809..025b3e90b37d46a0f42eccafd8d6700a616d788c 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py
@@ -69,8 +69,8 @@ class TestPSPassWithBow(unittest.TestCase):
         is_sparse = True
 
         # query
-        q = fluid.layers.data(
-            name="query_ids", shape=[1], dtype="int64", lod_level=1
+        q = paddle.static.data(
+            name="query_ids", shape=[-1, 1], dtype="int64", lod_level=1
         )
         # embedding
         q_emb = fluid.contrib.layers.sparse_embedding(
@@ -97,10 +97,10 @@ class TestPSPassWithBow(unittest.TestCase):
             ),
         )
         # label data
-        label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+        label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64")
         # pt
-        pt = fluid.layers.data(
-            name="pos_title_ids", shape=[1], dtype="int64", lod_level=1
+        pt = paddle.static.data(
+            name="pos_title_ids", shape=[-1, 1], dtype="int64", lod_level=1
         )
         # embedding
         pt_emb = fluid.contrib.layers.sparse_embedding(
@@ -128,8 +128,8 @@ class TestPSPassWithBow(unittest.TestCase):
             bias_attr=fluid.ParamAttr(name="__fc_b__"),
         )
         # nt
-        nt = fluid.layers.data(
-            name="neg_title_ids", shape=[1], dtype="int64", lod_level=1
+        nt = paddle.static.data(
+            name="neg_title_ids", shape=[-1, 1], dtype="int64", lod_level=1
         )
         # embedding
         nt_emb = fluid.contrib.layers.sparse_embedding(
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py
index ca69a778aac0395add998e54a55081eb32d04354..51bf54b3241b488549012ef82626c5fc707e4ba9 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py
@@ -69,8 +69,8 @@ class TestPSPassWithBow(unittest.TestCase):
         is_sparse = True
 
         # query
-        q = fluid.layers.data(
-            name="query_ids", shape=[1], dtype="int64", lod_level=1
+        q = paddle.static.data(
+            name="query_ids", shape=[-1, 1], dtype="int64", lod_level=1
         )
         # embedding
         q_emb = fluid.layers.embedding(
@@ -99,10 +99,10 @@ class TestPSPassWithBow(unittest.TestCase):
             ),
         )
         # label data
-        label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+        label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64")
         # pt
-        pt = fluid.layers.data(
-            name="pos_title_ids", shape=[1], dtype="int64", lod_level=1
+        pt = paddle.static.data(
+            name="pos_title_ids", shape=[-1, 1], dtype="int64", lod_level=1
         )
         # embedding
         pt_emb = fluid.layers.embedding(
@@ -132,8 +132,8 @@ class TestPSPassWithBow(unittest.TestCase):
             bias_attr=fluid.ParamAttr(name="__fc_b__"),
         )
         # nt
-        nt = fluid.layers.data(
-            name="neg_title_ids", shape=[1], dtype="int64", lod_level=1
+        nt = paddle.static.data(
+            name="neg_title_ids", shape=[-1, 1], dtype="int64", lod_level=1
         )
         # embedding
         nt_emb = fluid.layers.embedding(
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py
index ab6bb7198c655c7eac2e71db1d13d13c7ff59fed..165a8b6240aafac399dee3ad1803bc75b82b385c 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py
@@ -69,8 +69,8 @@ class TestPSPassWithBow(unittest.TestCase):
         is_sparse = True
 
         # query
-        q = fluid.layers.data(
-            name="query_ids", shape=[1], dtype="int64", lod_level=1
+        q = paddle.static.data(
+            name="query_ids", shape=[-1, 1], dtype="int64", lod_level=1
         )
         # embedding
         q_emb = fluid.contrib.layers.sparse_embedding(
@@ -97,10 +97,10 @@ class TestPSPassWithBow(unittest.TestCase):
             ),
         )
         # label data
-        label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+        label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64")
         # pt
-        pt = fluid.layers.data(
-            name="pos_title_ids", shape=[1], dtype="int64", lod_level=1
+        pt = paddle.static.data(
+            name="pos_title_ids", shape=[-1, 1], dtype="int64", lod_level=1
         )
         # embedding
         pt_emb = fluid.contrib.layers.sparse_embedding(
@@ -128,8 +128,8 @@ class TestPSPassWithBow(unittest.TestCase):
             bias_attr=fluid.ParamAttr(name="__fc_b__"),
         )
         # nt
-        nt = fluid.layers.data(
-            name="neg_title_ids", shape=[1], dtype="int64", lod_level=1
+        nt = paddle.static.data(
+            name="neg_title_ids", shape=[-1, 1], dtype="int64", lod_level=1
         )
         # embedding
         nt_emb = fluid.contrib.layers.sparse_embedding(
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_sparse_embedding_ctr.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_sparse_embedding_ctr.py
index 57f4615f7c9dbaecbe826f53ff23fca30c5f684d..517232fa54eb84647b01cd5d0e42822d5d24768a 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_sparse_embedding_ctr.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_sparse_embedding_ctr.py
@@ -189,26 +189,23 @@ class TestDistMnistAsync2x2WithGauss(TestFleetBase):
             """
             dnn_input_dim, lr_input_dim = 10, 10
 
-            dnn_data = fluid.layers.data(
+            dnn_data = paddle.static.data(
                 name="dnn_data",
                 shape=[-1, 1],
                 dtype="int64",
                 lod_level=1,
-                append_batch_size=False,
             )
-            lr_data = fluid.layers.data(
+            lr_data = paddle.static.data(
                 name="lr_data",
                 shape=[-1, 1],
                 dtype="int64",
                 lod_level=1,
-                append_batch_size=False,
             )
-            label = fluid.layers.data(
+            label = paddle.static.data(
                 name="click",
                 shape=[-1, 1],
                 dtype="int64",
                 lod_level=0,
-                append_batch_size=False,
             )
 
             datas = [dnn_data, lr_data, label]
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_spmt.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_spmt.py
index 72068108d2206478d19f06516db4bcec1714a7e3..ba6e67a035095e08c0430a04c4e6e149bdc42511 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_spmt.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_spmt.py
@@ -67,7 +67,9 @@ class TestSPMT(unittest.TestCase):
         is_sparse = True
 
         # query
-        q = fluid.layers.data(name="1", shape=[1], dtype="int64", lod_level=1)
+        q = paddle.static.data(
+            name="1", shape=[-1, 1], dtype="int64", lod_level=1
+        )
         # embedding
         q_emb = fluid.contrib.layers.sparse_embedding(
             input=q,
@@ -93,9 +95,11 @@ class TestSPMT(unittest.TestCase):
             ),
         )
         # label data
-        label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+        label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64")
         # pt
-        pt = fluid.layers.data(name="2", shape=[1], dtype="int64", lod_level=1)
+        pt = paddle.static.data(
+            name="2", shape=[-1, 1], dtype="int64", lod_level=1
+        )
         # embedding
         pt_emb = fluid.contrib.layers.sparse_embedding(
             input=pt,
@@ -122,7 +126,9 @@ class TestSPMT(unittest.TestCase):
             bias_attr=fluid.ParamAttr(name="__fc_b__"),
         )
         # nt
-        nt = fluid.layers.data(name="3", shape=[1], dtype="int64", lod_level=1)
+        nt = paddle.static.data(
+            name="3", shape=[-1, 1], dtype="int64", lod_level=1
+        )
         # embedding
         nt_emb = fluid.contrib.layers.sparse_embedding(
             input=nt,
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_trainer_desc_config.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_trainer_desc_config.py
index b13e2b8171c8cf92d9e11e12bfbbd085cd7cf9c6..e954b56d7f972e6b75d7e9c307cb0484e25a384d 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_trainer_desc_config.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_trainer_desc_config.py
@@ -39,8 +39,8 @@ class TestDistStrategyTrainerDescConfig(unittest.TestCase):
 
         fleet.init(role_maker.PaddleCloudRoleMaker())
 
-        x = paddle.fluid.layers.data(name='x', shape=[1], dtype='float32')
-        y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32')
+        x = paddle.static.data(name='x', shape=[-1, 1], dtype='float32')
+        y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
         cost = paddle.nn.functional.square_error_cost(input=x, label=y)
         avg_cost = paddle.mean(cost)
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_fleetapi.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_fleetapi.py
index 0749139be840f3dc9151bfa3cdb7775c3589d72b..08d9c52c685e4a6accebc1481f8834d0208f386c 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_mnist_fleetapi.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_fleetapi.py
@@ -55,7 +55,7 @@ class FleetCollectiveTest(unittest.TestCase):
             # Operator "gen_nccl_id" has not been registered
             return
 
-        data = fluid.layers.data(name='X', shape=[1], dtype='float32')
+        data = paddle.static.data(name='X', shape=[-1, 1], dtype='float32')
         hidden = paddle.static.nn.fc(x=data, size=10)
         loss = paddle.mean(hidden)
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_train.py b/python/paddle/fluid/tests/unittests/test_dist_train.py
index 5fa4f87f95a3240dc92260629c7ce660cad6cd42..828b07baf7bbc9220a40a508bd5d8eb5eec7d613 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_train.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_train.py
@@ -23,7 +23,6 @@ from dist_test_utils import remove_ps_flag
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 import paddle.fluid.layers.ops as ops
 from paddle.fluid import core
 from paddle.fluid.layers.io import ListenAndServ, Recv, Send
@@ -83,11 +82,10 @@ class TestSendOp(unittest.TestCase):
                     dtype="float32",
                     shape=[32, 32],
                 )
-                x = layers.data(
+                x = paddle.static.data(
                     shape=[32, 32],
                     dtype='float32',
                     name="X",
-                    append_batch_size=False,
                 )
                 fluid.initializer.Constant(value=1.0)(x, main.global_block())
                 ops._scale(x=x, scale=10.0, out=out_var)
@@ -108,12 +106,7 @@ class TestSendOp(unittest.TestCase):
                 },
             )
 
-            x = layers.data(
-                shape=[32, 32],
-                dtype='float32',
-                name='X',
-                append_batch_size=False,
-            )
+            x = paddle.static.data(shape=[32, 32], dtype='float32', name='X')
             x.persistable = True
             fluid.initializer.Constant(value=2.3)(x, main.global_block())
 
@@ -141,12 +134,7 @@ class TestSendOp(unittest.TestCase):
     def run_local(self, place):
         main = fluid.Program()
         with fluid.program_guard(main):
-            x = layers.data(
-                shape=[32, 32],
-                dtype='float32',
-                name='X',
-                append_batch_size=False,
-            )
+            x = paddle.static.data(shape=[32, 32], dtype='float32', name='X')
             fluid.initializer.Constant(value=2.3)(x, main.global_block())
             o = paddle.scale(x=x, scale=10.0)
         exe = fluid.Executor(place)
diff --git a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
index 71fb3f96d4cef8e03f19606322b109fe12813c9f..e9b8f773c743bc8cef82694f2e1a682df3509620 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
@@ -38,14 +38,14 @@ class TranspilerTest(unittest.TestCase):
         self.transpiler = None
 
     def net_conf(self):
-        x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
+        x = paddle.static.data(name='x', shape=[-1, 1000], dtype='float32')
         y_predict = paddle.static.nn.fc(
             x,
             size=1000,
             weight_attr=fluid.ParamAttr(name='fc_w'),
             bias_attr=fluid.ParamAttr(name='fc_b'),
         )
-        y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+        y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
         cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_cost = paddle.mean(cost)
         sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.1)
@@ -292,14 +292,14 @@ class TestNoSliceVar(TranspilerTest):
 
 class TestLRDecay(TranspilerTest):
     def net_conf(self):
-        x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
+        x = paddle.static.data(name='x', shape=[-1, 1000], dtype='float32')
         y_predict = paddle.static.nn.fc(
             x,
             size=1000,
             weight_attr=fluid.ParamAttr(name='fc_w'),
             bias_attr=fluid.ParamAttr(name='fc_b'),
         )
-        y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+        y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
         cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_cost = paddle.mean(cost)
         sgd_optimizer = fluid.optimizer.SGD(
@@ -338,14 +338,14 @@ class TestFakeInit(TranspilerTest):
     def net_conf(self):
         dict_size, embedding_size, neg_num = 10000, 8, 5
 
-        input_word = fluid.layers.data(
-            name="input_word", shape=[1], dtype='int64', lod_level=1
+        input_word = paddle.static.data(
+            name="input_word", shape=[-1, 1], dtype='int64', lod_level=1
         )
-        true_word = fluid.layers.data(
-            name='true_label', shape=[1], dtype='int64', lod_level=1
+        true_word = paddle.static.data(
+            name='true_label', shape=[-1, 1], dtype='int64', lod_level=1
         )
-        neg_word = fluid.layers.data(
-            name="neg_label", shape=[1], dtype='int64', lod_level=1
+        neg_word = paddle.static.data(
+            name="neg_label", shape=[-1, 1], dtype='int64', lod_level=1
         )
         inputs = [input_word, true_word, neg_word]
 
@@ -458,14 +458,14 @@ class TestFakeInit(TranspilerTest):
 
 class TestDecayedAdagrad(TranspilerTest):
     def net_conf(self):
-        x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
+        x = paddle.static.data(name='x', shape=[-1, 1000], dtype='float32')
         y_predict = paddle.static.nn.fc(
             x,
             size=1000,
             weight_attr=fluid.ParamAttr(name='fc_w'),
             bias_attr=fluid.ParamAttr(name='fc_b'),
         )
-        y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+        y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
         cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_cost = paddle.mean(cost)
         opt = fluid.optimizer.DecayedAdagrad(learning_rate=0.1)
@@ -478,14 +478,14 @@ class TestDecayedAdagrad(TranspilerTest):
 
 class TestFtrl(TranspilerTest):
     def net_conf(self):
-        x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
+        x = paddle.static.data(name='x', shape=[-1, 1000], dtype='float32')
         y_predict = paddle.static.nn.fc(
             x,
             size=1000,
             weight_attr=fluid.ParamAttr(name='fc_w'),
             bias_attr=fluid.ParamAttr(name='fc_b'),
         )
-        y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+        y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
         cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_cost = paddle.mean(cost)
         opt = fluid.optimizer.Ftrl(learning_rate=0.1)
@@ -498,14 +498,14 @@ class TestFtrl(TranspilerTest):
 
 class TestLRDecayConditional(TranspilerTest):
     def net_conf(self):
-        x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
+        x = paddle.static.data(name='x', shape=[-1, 1000], dtype='float32')
         y_predict = paddle.static.nn.fc(
             x,
             size=1000,
             weight_attr=fluid.ParamAttr(name='fc_w'),
             bias_attr=fluid.ParamAttr(name='fc_b'),
         )
-        y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+        y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
         cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_cost = paddle.mean(cost)
         sgd_optimizer = fluid.optimizer.SGD(
@@ -561,7 +561,7 @@ class TestLRDecayConditional(TranspilerTest):
 
 class TestL2Decay(TranspilerTest):
     def net_conf(self):
-        x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
+        x = paddle.static.data(name='x', shape=[-1, 1000], dtype='float32')
         y_predict = paddle.static.nn.fc(
             x,
             size=1000,
@@ -570,7 +570,7 @@ class TestL2Decay(TranspilerTest):
             ),
             bias_attr=fluid.ParamAttr(name='fc_b'),
         )
-        y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+        y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
         cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_cost = paddle.mean(cost)
         sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.1)
@@ -599,14 +599,14 @@ class TestL2Decay(TranspilerTest):
 
 class TestL2DecayWithPiecewise(TranspilerTest):
     def net_conf(self):
-        x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
+        x = paddle.static.data(name='x', shape=[-1, 1000], dtype='float32')
         y_predict = paddle.static.nn.fc(
             x,
             size=1000,
             weight_attr=fluid.ParamAttr(name='fc_w'),
             bias_attr=fluid.ParamAttr(name='fc_b'),
         )
-        y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+        y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
         cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_cost = paddle.mean(cost)
         base_lr = 1.0
@@ -673,7 +673,7 @@ class TestL2DecayWithPiecewise(TranspilerTest):
 
 class TestEmptyPserverOptimizeBlocks(TranspilerTest):
     def net_conf(self):
-        x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
+        x = paddle.static.data(name='x', shape=[-1, 1000], dtype='float32')
         # only one parameter
         y_predict = paddle.static.nn.fc(
             x,
@@ -681,7 +681,7 @@ class TestEmptyPserverOptimizeBlocks(TranspilerTest):
             weight_attr=fluid.ParamAttr(name='fc_w'),
             bias_attr=False,
         )
-        y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+        y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
         cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_cost = paddle.mean(cost)
         sgd_optimizer = fluid.optimizer.SGD(learning_rate=1.0)
@@ -715,14 +715,14 @@ class TestDistLookupTableBase(TranspilerTest):
             pool = fluid.layers.sequence_pool(input=emb, pool_type='average')
             return pool
 
-        title_ids = fluid.layers.data(
-            name='title_ids', shape=[1], dtype='int64', lod_level=1
+        title_ids = paddle.static.data(
+            name='title_ids', shape=[-1, 1], dtype='int64', lod_level=1
         )
-        brand_ids = fluid.layers.data(
-            name='brand_ids', shape=[1], dtype='int64', lod_level=1
+        brand_ids = paddle.static.data(
+            name='brand_ids', shape=[-1, 1], dtype='int64', lod_level=1
         )
-        profile_ids = fluid.layers.data(
-            name='brand_ids', shape=[1], dtype='int64', lod_level=1
+        profile_ids = paddle.static.data(
+            name='brand_ids', shape=[-1, 1], dtype='int64', lod_level=1
         )
         title_emb = emb_pool(title_ids, self.lookup_table_name, is_distributed)
         brand_emb = emb_pool(brand_ids, self.lookup_table_name, is_distributed)
@@ -737,7 +737,7 @@ class TestDistLookupTableBase(TranspilerTest):
             bias_attr=fluid.ParamAttr(name='fc_b'),
         )
 
-        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+        label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
         cost = paddle.nn.functional.cross_entropy(
             input=predict, label=label, reduction='none', use_softmax=False
         )
@@ -1116,14 +1116,14 @@ class TestDistArgsInProgram(TestDistLookupTableBase):
 
 class TestRMSPropOptimizer(TranspilerTest):
     def net_conf(self):
-        x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
+        x = paddle.static.data(name='x', shape=[-1, 1000], dtype='float32')
         y_predict = paddle.static.nn.fc(
             x,
             size=1000,
             weight_attr=fluid.ParamAttr(name='fc_w'),
             bias_attr=fluid.ParamAttr(name='fc_b'),
         )
-        y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+        y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
         cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_cost = paddle.mean(cost)
         optimizer = fluid.optimizer.RMSProp(learning_rate=0.1)
@@ -1148,14 +1148,14 @@ class TestRMSPropOptimizer(TranspilerTest):
 
 class TestLoadSliceVar(TranspilerTest):
     def net_conf(self):
-        x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
+        x = paddle.static.data(name='x', shape=[-1, 1000], dtype='float32')
         y_predict = paddle.static.nn.fc(
             x,
             size=1000,
             weight_attr=fluid.ParamAttr(name='fc_w'),
             bias_attr=fluid.ParamAttr(name='fc_b'),
         )
-        y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+        y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
         cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
         avg_cost = paddle.mean(cost)
         optimizer = fluid.optimizer.RMSProp(learning_rate=0.1)
@@ -1315,8 +1315,10 @@ class TestRemoteNce(TestDistLookupTableBase):
         sampler = "uniform"
         nid_freq_arr = np.random.dirichlet(np.ones(20) * 1000).astype('float32')
 
-        input = fluid.layers.data(name="input", shape=[10], dtype="float32")
-        label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+        input = paddle.static.data(
+            name="input", shape=[-1, 10], dtype="float32"
+        )
+        label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64")
 
         w_param = (
             fluid.default_main_program()
@@ -1388,13 +1390,13 @@ class TestRemoteHsigmoid(TestDistLookupTableBase):
 
         num_total_classes = 3
 
-        input = fluid.layers.data(name="input", shape=[1], dtype="float32")
-        label = fluid.layers.data(name="label", shape=[1], dtype="int64")
-        path_table = fluid.layers.data(
-            name='path_table', shape=[3], dtype='int64'
+        input = paddle.static.data(name="input", shape=[-1, 1], dtype="float32")
+        label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64")
+        path_table = paddle.static.data(
+            name='path_table', shape=[-1, 3], dtype='int64'
         )
-        path_code = fluid.layers.data(
-            name='path_code', shape=[3], dtype='int64'
+        path_code = paddle.static.data(
+            name='path_code', shape=[-1, 3], dtype='int64'
         )
         w_param = (
             fluid.default_main_program()
diff --git a/python/paddle/fluid/tests/unittests/test_dist_tree_index.py b/python/paddle/fluid/tests/unittests/test_dist_tree_index.py
index d5abb734733456b72e27d32dfa31bbef42a96900..b336de40cb02296de94e88c650daccef277a6b79 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_tree_index.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_tree_index.py
@@ -17,7 +17,6 @@ import tempfile
 import unittest
 
 import paddle
-import paddle.fluid as fluid
 from paddle.dataset.common import download
 from paddle.distributed.fleet.dataset import TreeIndex
 
@@ -25,19 +24,19 @@ paddle.enable_static()
 
 
 def create_feeds():
-    user_input = fluid.layers.data(
-        name="item_id", shape=[1], dtype="int64", lod_level=1
+    user_input = paddle.static.data(
+        name="item_id", shape=[-1, 1], dtype="int64", lod_level=1
     )
 
-    item = fluid.layers.data(
-        name="unit_id", shape=[1], dtype="int64", lod_level=1
+    item = paddle.static.data(
+        name="unit_id", shape=[-1, 1], dtype="int64", lod_level=1
     )
 
-    label = fluid.layers.data(
-        name="label", shape=[1], dtype="int64", lod_level=1
+    label = paddle.static.data(
+        name="label", shape=[-1, 1], dtype="int64", lod_level=1
     )
-    labels = fluid.layers.data(
-        name="labels", shape=[1], dtype="int64", lod_level=1
+    labels = paddle.static.data(
+        name="labels", shape=[-1, 1], dtype="int64", lod_level=1
     )
 
     feed_list = [user_input, item, label, labels]
@@ -140,7 +139,7 @@ class TestIndexSampler(unittest.TestCase):
         slots = ["slot1", "slot2", "slot3"]
         slots_vars = []
         for slot in slots:
-            var = fluid.layers.data(name=slot, shape=[1], dtype="int64")
+            var = paddle.static.data(name=slot, shape=[-1, 1], dtype="int64")
             slots_vars.append(var)
 
         dataset = paddle.distributed.InMemoryDataset()
diff --git a/python/paddle/fluid/tests/unittests/test_dot_op.py b/python/paddle/fluid/tests/unittests/test_dot_op.py
index 55460c2f14b9d6dc10a802adedf8b70d4b8dcbb4..d32057bfb0d2dd893722422775b64bbe6670b50d 100644
--- a/python/paddle/fluid/tests/unittests/test_dot_op.py
+++ b/python/paddle/fluid/tests/unittests/test_dot_op.py
@@ -116,16 +116,22 @@ class TestDotOpError(unittest.TestCase):
 
             # the input dtype of elementwise_mul must be float16 or float32 or float64 or int32 or int64
             # float16 only can be set on GPU place
-            x1 = fluid.layers.data(name='x1', shape=[120], dtype="uint8")
-            y1 = fluid.layers.data(name='y1', shape=[120], dtype="uint8")
+            x1 = paddle.static.data(name='x1', shape=[-1, 120], dtype="uint8")
+            y1 = paddle.static.data(name='y1', shape=[-1, 120], dtype="uint8")
             self.assertRaises(Exception, paddle.dot, x1, y1)
 
-            x2 = fluid.layers.data(name='x2', shape=[2, 3], dtype="float32")
-            y2 = fluid.layers.data(name='y2', shape=[2, 3], dtype="float32")
+            x2 = paddle.static.data(
+                name='x2', shape=[-1, 2, 3], dtype="float32"
+            )
+            y2 = paddle.static.data(
+                name='y2', shape=[-1, 2, 3], dtype="float32"
+            )
             self.assertRaises(Exception, paddle.dot, x2, y2)
 
-            x3 = fluid.layers.data(name='x3', shape=[3], dtype="float32")
-            y3 = fluid.layers.data(name='y3', shape=[2, 3], dtype="float32")
+            x3 = paddle.static.data(name='x3', shape=[-1, 3], dtype="float32")
+            y3 = paddle.static.data(
+                name='y3', shape=[-1, 2, 3], dtype="float32"
+            )
             self.assertRaises(Exception, paddle.dot, x2, y3)
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_downpoursgd.py b/python/paddle/fluid/tests/unittests/test_downpoursgd.py
index 29e022c4ff3c6f9afc494b9c145f08b246b443aa..652660b8e00d00b585fbee121d971312dbb43a29 100644
--- a/python/paddle/fluid/tests/unittests/test_downpoursgd.py
+++ b/python/paddle/fluid/tests/unittests/test_downpoursgd.py
@@ -52,12 +52,12 @@ class TestListenAndServOp(unittest.TestCase):
                     cache_path
                 )
                 os.system(cmd)
-            x = fluid.layers.data(name='x', shape=[1], dtype='int64')
+            x = paddle.static.data(name='x', shape=[-1, 1], dtype='int64')
             x_emb = fluid.layers.embedding(
                 input=x, size=[1, 2], is_distributed=True
             )
             y_predict = paddle.static.nn.fc(x=x_emb, size=1)
-            y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+            y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
             cost = paddle.nn.functional.square_error_cost(
                 input=y_predict, label=y
             )
@@ -116,12 +116,12 @@ class TestListenAndServOp(unittest.TestCase):
                     cache_path
                 )
                 os.system(cmd)
-            x = fluid.layers.data(name='x', shape=[1], dtype='int64')
+            x = paddle.static.data(name='x', shape=[-1, 1], dtype='int64')
             x_emb = fluid.layers.embedding(
                 input=x, size=[1, 2], is_distributed=True
             )
             y_predict = paddle.static.nn.fc(x=x_emb, size=1)
-            y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+            y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
             cost = paddle.nn.functional.square_error_cost(
                 input=y_predict, label=y
             )
@@ -178,12 +178,12 @@ class TestListenAndServOp(unittest.TestCase):
                     cache_path
                 )
                 os.system(cmd)
-            x = fluid.layers.data(name='x', shape=[1], dtype='int64')
+            x = paddle.static.data(name='x', shape=[-1, 1], dtype='int64')
             x_emb = fluid.layers.embedding(
                 input=x, size=[1, 2], is_distributed=True
             )
             y_predict = paddle.static.nn.fc(x=x_emb, size=1)
-            y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+            y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
             cost = paddle.nn.functional.square_error_cost(
                 input=y_predict, label=y
             )
diff --git a/python/paddle/fluid/tests/unittests/test_dropout_op.py b/python/paddle/fluid/tests/unittests/test_dropout_op.py
index f68b8b0561f2b4d4a7df11e4909c57b6968cf77d..9a48b877f5f22d5029cdf504b6b1077165c5946d 100644
--- a/python/paddle/fluid/tests/unittests/test_dropout_op.py
+++ b/python/paddle/fluid/tests/unittests/test_dropout_op.py
@@ -349,8 +349,8 @@ class TestDropoutOpError(unittest.TestCase):
             def test_dtype():
                 # the input dtype of dropout must be float16 or float32 or float64
                 # float16 only can be set on GPU place
-                x2 = fluid.layers.data(
-                    name='x2', shape=[3, 4, 5, 6], dtype="int32"
+                x2 = paddle.static.data(
+                    name='x2', shape=[-1, 3, 4, 5, 6], dtype="int32"
                 )
                 paddle.nn.functional.dropout(x2, p=0.5)
 
diff --git a/python/paddle/fluid/tests/unittests/test_dygraph_multi_forward.py b/python/paddle/fluid/tests/unittests/test_dygraph_multi_forward.py
index e79773e8d41f46805d7b29bc09b5a8320644e42b..8e07d427e4ef158c8d7bac1604c1200bee90a6c5 100644
--- a/python/paddle/fluid/tests/unittests/test_dygraph_multi_forward.py
+++ b/python/paddle/fluid/tests/unittests/test_dygraph_multi_forward.py
@@ -164,10 +164,12 @@ class TestDygraphMultiForward(unittest.TestCase):
                 paddle.dataset.mnist.train(), batch_size=128, drop_last=True
             )
 
-            img = fluid.layers.data(
-                name='pixel', shape=[1, 28, 28], dtype='float32'
+            img = paddle.static.data(
+                name='pixel', shape=[-1, 1, 28, 28], dtype='float32'
+            )
+            label = paddle.static.data(
+                name='label', shape=[-1, 1], dtype='int64'
             )
-            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
             cost = mnist(img)
             loss = paddle.nn.functional.cross_entropy(
                 cost, label, reduction='none', use_softmax=False
diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py
index 43bd95270839e2a5e064229d0fba197a76c4bec5..a12a17636bfc14dd2e83f365704421112d309967 100644
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py
@@ -32,8 +32,8 @@ fluid.core._set_eager_deletion_mode(0.0, 1.0, True)
 
 
 def simple_fc_net():
-    image = fluid.layers.data(name='image', shape=[784], dtype='float32')
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    image = paddle.static.data(name='image', shape=[-1, 784], dtype='float32')
+    label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
     hidden = image
     for _ in range(4):
         hidden = paddle.static.nn.fc(
diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py
index f9294f152dcdedf7439c7b2220cc1c02a4c55a94..44153b6e2fe4eb10ba439ffbfe2f3600e247633b 100644
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py
@@ -41,11 +41,11 @@ def train(network, use_cuda, use_parallel_executor, batch_size=32, pass_num=2):
     reader = fake_imdb_reader(word_dict_size, batch_size * 40)
     train_reader = paddle.batch(reader, batch_size=batch_size)
 
-    data = fluid.layers.data(
-        name="words", shape=[1], dtype="int64", lod_level=1
+    data = paddle.static.data(
+        name="words", shape=[-1, 1], dtype="int64", lod_level=1
     )
 
-    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+    label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64")
 
     cost = network(data, label, word_dict_size)
     cost.persistable = True
diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
index 80bc977f091bac9e57c5e4774e5236a96115c22c..5657eb174c30331838ece0977d6da423524a4323 100644
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
@@ -333,30 +333,22 @@ def lm_model(
         return real_res, last_hidden, last_cell
 
     batch_size_each = batch_size
-    x = layers.data(
-        name="x",
-        shape=[batch_size_each, num_steps, 1],
-        dtype='int64',
-        append_batch_size=False,
+    x = paddle.static.data(
+        name="x", shape=[batch_size_each, num_steps, 1], dtype='int64'
     )
-    y = layers.data(
-        name="y",
-        shape=[batch_size_each * num_steps, 1],
-        dtype='int64',
-        append_batch_size=False,
+    y = paddle.static.data(
+        name="y", shape=[batch_size_each * num_steps, 1], dtype='int64'
     )
 
-    init_hidden = layers.data(
+    init_hidden = paddle.static.data(
         name="init_hidden",
         shape=[num_layers, batch_size_each, hidden_size],
         dtype='float32',
-        append_batch_size=False,
     )
-    init_cell = layers.data(
+    init_cell = paddle.static.data(
         name="init_cell",
         shape=[num_layers, batch_size_each, hidden_size],
         dtype='float32',
-        append_batch_size=False,
     )
 
     init_cell.persistable = True
diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py
index 10f5def7248483bc019209d64f3a804bc9712d83..bd4e08819570f7838eeb51fa4861087a8d134d87 100644
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py
@@ -139,15 +139,14 @@ class EagerDeletionRecurrentOpTest1(unittest.TestCase):
             self.output = paddle.mean(self.create_rnn_op())
 
     def create_rnn_op(self):
-        x = layers.data(
+        x = paddle.static.data(
             shape=[self.sent_len, self.batch_size, self.input_dim],
             dtype='float32',
             name='x',
-            append_batch_size=False,
         )
         x.stop_gradient = False
-        h_boot = layers.data(
-            shape=[self.input_dim], dtype='float32', name='h_boot'
+        h_boot = paddle.static.data(
+            shape=[-1, self.input_dim], dtype='float32', name='h_boot'
         )
         h_boot.stop_gradient = False
 
@@ -292,15 +291,14 @@ class EagerDeletionRecurrentOpTest2(EagerDeletionRecurrentOpTest1):
             self.output = paddle.mean(self.create_rnn_op())
 
     def create_rnn_op(self):
-        x = layers.data(
+        x = paddle.static.data(
             shape=[self.sent_len, self.batch_size, self.input_dim],
             dtype='float32',
             name='x',
-            append_batch_size=False,
         )
         x.stop_gradient = False
-        h_boot = layers.data(
-            shape=[self.input_dim], dtype='float32', name='h_boot'
+        h_boot = paddle.static.data(
+            shape=[-1, self.input_dim], dtype='float32', name='h_boot'
         )
         h_boot.stop_gradient = False
 
@@ -402,25 +400,22 @@ class EagerDeletionRecurrentOpMultipleMemoryTest(EagerDeletionRecurrentOpTest1):
             self.output = paddle.mean(self.create_rnn_op())
 
     def create_rnn_op(self):
-        x = layers.data(
+        x = paddle.static.data(
             shape=[self.sent_len, self.batch_size, self.input_dim],
             dtype='float32',
             name='x',
-            append_batch_size=False,
         )
         x.stop_gradient = False
-        h_boot1 = layers.data(
+        h_boot1 = paddle.static.data(
             shape=[self.batch_size, self.input_dim],
             dtype='float32',
             name='h_boot1',
-            append_batch_size=False,
         )
         h_boot1.stop_gradient = False
-        h_boot2 = layers.data(
+        h_boot2 = paddle.static.data(
             shape=[self.batch_size, self.input_dim],
             dtype='float32',
             name='h_boot2',
-            append_batch_size=False,
         )
         h_boot2.stop_gradient = False
 
@@ -490,11 +485,10 @@ class EagerDeletionRecurrentOpNoMemBootTest(EagerDeletionRecurrentOpTest1):
             self.output = paddle.mean(self.create_rnn_op())
 
     def create_rnn_op(self):
-        x = layers.data(
+        x = paddle.static.data(
             shape=[self.sent_len, self.batch_size, self.input_dim],
             dtype='float32',
             name='x',
-            append_batch_size=False,
         )
         x.stop_gradient = False
 
@@ -570,11 +564,10 @@ class EagerDeletionTwoRecurrentOpsTest(EagerDeletionRecurrentOpTest1):
             self.output = paddle.mean(self.create_rnn_op())
 
     def create_rnn_op(self):
-        x = layers.data(
+        x = paddle.static.data(
             shape=[self.sent_len, self.batch_size, self.input_dim],
             dtype='float32',
             name='x',
-            append_batch_size=False,
         )
         x.stop_gradient = False
 
@@ -673,15 +666,14 @@ class EagerDeletionFarwardOnlyRnnAndBackwardRnnTest(
         self.py_rnn = PySimpleRNN1(self.input_shape, self.output_shape)
 
         with fluid.program_guard(self.main_program, self.startup_program):
-            x = layers.data(
+            x = paddle.static.data(
                 shape=[self.sent_len, self.batch_size, self.input_dim],
                 dtype='float32',
                 name='x',
-                append_batch_size=False,
             )
             x.stop_gradient = False
-            h_boot = layers.data(
-                shape=[self.input_dim], dtype='float32', name='h_boot'
+            h_boot = paddle.static.data(
+                shape=[-1, self.input_dim], dtype='float32', name='h_boot'
             )
             h_boot.stop_gradient = False
 
diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_while_op.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_while_op.py
index f2b5f667fde9f40586dbb84fab79f413e365dcc3..097b5de0b90eb2b6fb54ae1da0ca58ee36624f39 100644
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_while_op.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_while_op.py
@@ -66,15 +66,9 @@ class TestEagerDeletionWhileOpBase(unittest.TestCase):
                 else 1
             )
 
-        d0 = layers.data(
-            "d0", shape=[10], append_batch_size=False, dtype='float32'
-        )
-        d1 = layers.data(
-            "d1", shape=[10], append_batch_size=False, dtype='float32'
-        )
-        d2 = layers.data(
-            "d2", shape=[10], append_batch_size=False, dtype='float32'
-        )
+        d0 = paddle.static.data("d0", shape=[-1, 10], dtype='float32')
+        d1 = paddle.static.data("d1", shape=[-1, 10], dtype='float32')
+        d2 = paddle.static.data("d2", shape=[-1, 10], dtype='float32')
 
         i = layers.zeros(shape=[1], dtype='int64')
         i.stop_gradient = True
diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py b/python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py
index 4be4ddd2287e9bf405eec99d6355c30d9adc2739..2c5da64817d5f2fc7a54c4e121a41562ba8c4664 100644
--- a/python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py
@@ -21,7 +21,6 @@ from decorator_helper import prog_scope
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import paddle.fluid.layers as layers
 
 
 class TestElementwiseMulDoubleGradCheck(unittest.TestCase):
@@ -32,8 +31,8 @@ class TestElementwiseMulDoubleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float64
 
-        x = layers.data('x', shape, False, dtype)
-        y = layers.data('y', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
+        y = paddle.static.data('y', shape, dtype)
         x.persistable = True
         y.persistable = True
         out = paddle.multiply(x, y)
@@ -61,8 +60,8 @@ class TestElementwiseMulBroadcastDoubleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float64
 
-        x = layers.data('x', shape, False, dtype)
-        y = layers.data('y', shape[:-1], False, dtype)
+        x = paddle.static.data('x', shape, dtype)
+        y = paddle.static.data('y', shape[:-1], dtype)
         x.persistable = True
         y.persistable = True
         out = paddle.tensor.math._multiply_with_axis(x, y, axis=0)
@@ -90,8 +89,8 @@ class TestElementwiseAddDoubleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float64
 
-        x = layers.data('x', shape, False, dtype)
-        y = layers.data('y', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
+        y = paddle.static.data('y', shape, dtype)
         x.persistable = True
         y.persistable = True
         out = paddle.add(x, y)
@@ -119,8 +118,8 @@ class TestElementwiseAddBroadcastDoubleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float64
 
-        x = layers.data('x', shape, False, dtype)
-        y = layers.data('y', shape[:-1], False, dtype)
+        x = paddle.static.data('x', shape, dtype)
+        y = paddle.static.data('y', shape[:-1], dtype)
         x.persistable = True
         y.persistable = True
         out = paddle.tensor.math._add_with_axis(x, y, axis=0)
@@ -151,8 +150,8 @@ class TestElementwiseSubDoubleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float64
 
-        x = layers.data('x', shape, False, dtype)
-        y = layers.data('y', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
+        y = paddle.static.data('y', shape, dtype)
         x.persistable = True
         y.persistable = True
         out = paddle.subtract(x, y)
@@ -187,8 +186,8 @@ class TestElementwiseSubBroadcastDoubleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float64
 
-        x = layers.data('x', shape, False, dtype)
-        y = layers.data('y', shape[:-1], False, dtype)
+        x = paddle.static.data('x', shape, dtype)
+        y = paddle.static.data('y', shape[:-1], dtype)
         x.persistable = True
         y.persistable = True
         out = paddle.tensor.math._subtract_with_axis(x, y, axis=0)
@@ -219,8 +218,8 @@ class TestElementwiseDivDoubleGradCheck(unittest.TestCase):
         eps = 0.0001
         dtype = np.float64
 
-        x = layers.data('x', shape, False, dtype)
-        y = layers.data('y', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
+        y = paddle.static.data('y', shape, dtype)
         x.persistable = True
         y.persistable = True
         out = paddle.tensor.math._divide_with_axis(x, y, axis=0)
@@ -257,8 +256,8 @@ class TestElementwiseDivBroadcastDoubleGradCheck(unittest.TestCase):
         eps = 0.0001
         dtype = np.float64
 
-        x = layers.data('x', shape, False, dtype)
-        y = layers.data('y', shape[1:-1], False, dtype)
+        x = paddle.static.data('x', shape, dtype)
+        y = paddle.static.data('y', shape[1:-1], dtype)
         x.persistable = True
         y.persistable = True
         out = paddle.tensor.math._divide_with_axis(x, y, axis=1)
@@ -287,8 +286,8 @@ class TestElementwiseAddTripleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float64
 
-        x = layers.data('x', shape, False, dtype)
-        y = layers.data('y', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
+        y = paddle.static.data('y', shape, dtype)
         x.persistable = True
         y.persistable = True
         out = paddle.add(x, y)
@@ -316,8 +315,8 @@ class TestElementwiseAddBroadcastTripleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float64
 
-        x = layers.data('x', shape, False, dtype)
-        y = layers.data('y', shape[:-1], False, dtype)
+        x = paddle.static.data('x', shape, dtype)
+        y = paddle.static.data('y', shape[:-1], dtype)
         x.persistable = True
         y.persistable = True
         out = paddle.tensor.math._add_with_axis(x, y, axis=0)
@@ -348,8 +347,8 @@ class TestElementwiseMulTripleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float64
 
-        x = layers.data('x', shape, False, dtype)
-        y = layers.data('y', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
+        y = paddle.static.data('y', shape, dtype)
         x.persistable = True
         y.persistable = True
         out = paddle.multiply(x, y)
@@ -384,8 +383,8 @@ class TestElementwiseMulBroadcastTripleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float64
 
-        x = layers.data('x', shape, False, dtype)
-        y = layers.data('y', shape[:-1], False, dtype)
+        x = paddle.static.data('x', shape, dtype)
+        y = paddle.static.data('y', shape[:-1], dtype)
         x.persistable = True
         y.persistable = True
         out = paddle.tensor.math._add_with_axis(x, y, axis=0)
diff --git a/python/paddle/fluid/tests/unittests/test_entry_attr.py b/python/paddle/fluid/tests/unittests/test_entry_attr.py
index 07d2ab7fa8f0ebfe2f6e262df340ca8d76d2d2da..6cc7606a7a9cecb39b9124f9cbf8af859d84d32b 100644
--- a/python/paddle/fluid/tests/unittests/test_entry_attr.py
+++ b/python/paddle/fluid/tests/unittests/test_entry_attr.py
@@ -67,12 +67,8 @@ class EntryAttrChecks(unittest.TestCase):
 
         with fluid.scope_guard(scope):
             with fluid.program_guard(prog):
-                input = fluid.layers.data(
-                    name="dnn_data",
-                    shape=[-1, 1],
-                    dtype="int64",
-                    lod_level=1,
-                    append_batch_size=False,
+                input = paddle.static.data(
+                    name="dnn_data", shape=[-1, 1], dtype="int64", lod_level=1
                 )
                 prob = ProbabilityEntry(0.5)
                 emb = paddle.static.nn.sparse_embedding(
diff --git a/python/paddle/fluid/tests/unittests/test_entry_attr2.py b/python/paddle/fluid/tests/unittests/test_entry_attr2.py
index 5db31f906fa8e306803b0f921f43a188ea291536..d06e248f901df1f8ad9493e92c97338840e769f1 100644
--- a/python/paddle/fluid/tests/unittests/test_entry_attr2.py
+++ b/python/paddle/fluid/tests/unittests/test_entry_attr2.py
@@ -28,12 +28,8 @@ class EntryAttrChecks(unittest.TestCase):
 
         with fluid.scope_guard(scope):
             with fluid.program_guard(prog):
-                input = fluid.layers.data(
-                    name="dnn_data",
-                    shape=[-1, 1],
-                    dtype="int64",
-                    lod_level=1,
-                    append_batch_size=False,
+                input = paddle.static.data(
+                    name="dnn_data", shape=[-1, 1], dtype="int64", lod_level=1
                 )
                 emb = fluid.layers.embedding(
                     input=input,
diff --git a/python/paddle/fluid/tests/unittests/test_exception.py b/python/paddle/fluid/tests/unittests/test_exception.py
index aca120b48fc7ebe712f184e77660a0a81a20d9c8..57faeaacc0708100be443e0b7e9b7f32b9848508 100644
--- a/python/paddle/fluid/tests/unittests/test_exception.py
+++ b/python/paddle/fluid/tests/unittests/test_exception.py
@@ -40,8 +40,8 @@ class TestExceptionNoCStack(unittest.TestCase):
         fluid.set_flags({'FLAGS_call_stack_level': 1})
 
     def test_exception_in_static_mode(self):
-        x = fluid.layers.data(name='X', shape=[-1, 13], dtype='float32')
-        y = fluid.layers.data(name='Y', shape=[-1, 1], dtype='float32')
+        x = paddle.static.data(name='X', shape=[-1, 13], dtype='float32')
+        y = paddle.static.data(name='Y', shape=[-1, 1], dtype='float32')
         predict = paddle.static.nn.fc(x, size=1)
         loss = paddle.nn.functional.square_error_cost(input=predict, label=y)
         avg_loss = paddle.mean(loss)
diff --git a/python/paddle/fluid/tests/unittests/test_executor_and_mul.py b/python/paddle/fluid/tests/unittests/test_executor_and_mul.py
index d1c822b5823aa6765b4dc447d68ddd05fab9d48a..f9bb4286ad38147342b0d2159f6c24b00ea82cc0 100644
--- a/python/paddle/fluid/tests/unittests/test_executor_and_mul.py
+++ b/python/paddle/fluid/tests/unittests/test_executor_and_mul.py
@@ -18,20 +18,19 @@ import numpy as np
 
 import paddle
 from paddle.fluid.executor import Executor
-from paddle.fluid.layers import data, zeros
+from paddle.fluid.layers import zeros
+from paddle.static import data
 from paddle.tensor import array_write
 
 
 class TestExecutor(unittest.TestCase):
     def test_mul(self):
         i = zeros(shape=[1], dtype='int64')
-        a = data(name='a', shape=[784], dtype='float32')
+        a = data(name='a', shape=[-1, 784], dtype='float32')
         array = array_write(x=a, i=i)
 
         i = paddle.increment(i)
-        b = data(
-            name='b', shape=[784, 100], dtype='float32', append_batch_size=False
-        )
+        b = data(name='b', shape=[784, 100], dtype='float32')
         array_write(x=b, i=i, array=array)
 
         i = paddle.increment(i)
diff --git a/python/paddle/fluid/tests/unittests/test_executor_and_use_program_cache.py b/python/paddle/fluid/tests/unittests/test_executor_and_use_program_cache.py
index 623f2065bc0e29dcf587bb5b988d97315bdf5ec9..fe9d09cb54d424e49e059f70a9122d7cd2a35343 100644
--- a/python/paddle/fluid/tests/unittests/test_executor_and_use_program_cache.py
+++ b/python/paddle/fluid/tests/unittests/test_executor_and_use_program_cache.py
@@ -26,13 +26,10 @@ class TestExecutor(unittest.TestCase):
         main_program = fluid.Program()
         startup_program = fluid.Program()
         with fluid.program_guard(main_program, startup_program):
-            a = fluid.layers.data(name='a', shape=[784], dtype='float32')
-            b = fluid.layers.data(
-                name='b',
-                shape=[784, 100],
-                dtype='float32',
-                append_batch_size=False,
-            )
+            a = paddle.static.data(name='a', shape=[-1, 784], dtype='float32')
+            b = paddle.static.data(name='b', shape=[784, 100], dtype='float32')
+            a.desc.set_need_check_feed(False)
+            b.desc.set_need_check_feed(False)
             output = paddle.matmul(x=a, y=b)
 
         # Compute with numpy
diff --git a/python/paddle/fluid/tests/unittests/test_expand_as_v2_op.py b/python/paddle/fluid/tests/unittests/test_expand_as_v2_op.py
index fccfab3e4c3fd3c644efc8c82fa616fc100673a8..8fb2110bfe211f7e60ff5ca4a3c3224e5a3441c7 100755
--- a/python/paddle/fluid/tests/unittests/test_expand_as_v2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_expand_as_v2_op.py
@@ -100,10 +100,10 @@ class TestExpandAsOpRank5(TestExpandAsBasic):
 class TestExpandAsV2Error(unittest.TestCase):
     def test_errors(self):
         with fluid.program_guard(fluid.Program(), fluid.Program()):
-            x1 = fluid.layers.data(name='x1', shape=[4], dtype="uint8")
-            x2 = fluid.layers.data(name='x2', shape=[4], dtype="int32")
+            x1 = paddle.static.data(name='x1', shape=[-1, 4], dtype="uint8")
+            x2 = paddle.static.data(name='x2', shape=[-1, 4], dtype="int32")
             self.assertRaises(TypeError, paddle.tensor.expand_as, x1, x2)
-            x3 = fluid.layers.data(name='x3', shape=[4], dtype="bool")
+            x3 = paddle.static.data(name='x3', shape=[-1, 4], dtype="bool")
             x3.stop_gradient = False
             self.assertRaises(ValueError, paddle.tensor.expand_as, x3, x2)
 
@@ -113,14 +113,11 @@ class TestExpandAsV2API(unittest.TestCase):
     def test_api(self):
         input1 = np.random.random([12, 14]).astype("float32")
         input2 = np.random.random([2, 12, 14]).astype("float32")
-        x = fluid.layers.data(
-            name='x', shape=[12, 14], append_batch_size=False, dtype="float32"
-        )
+        x = paddle.static.data(name='x', shape=[12, 14], dtype="float32")
 
-        y = fluid.layers.data(
+        y = paddle.static.data(
             name='target_tensor',
             shape=[2, 12, 14],
-            append_batch_size=False,
             dtype="float32",
         )
 
diff --git a/python/paddle/fluid/tests/unittests/test_expand_v2_op.py b/python/paddle/fluid/tests/unittests/test_expand_v2_op.py
index 8839def6923639d8b49344f3a2f357d0179ddaf2..0a5eda417e95cd82b28f720d33729d9eb3231b43 100644
--- a/python/paddle/fluid/tests/unittests/test_expand_v2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_expand_v2_op.py
@@ -21,7 +21,6 @@ from op_test import OpTest
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 from paddle.fluid import Program, core, program_guard
 
 
@@ -194,9 +193,9 @@ class TestExpandV2Error(unittest.TestCase):
             )
             shape = [2, 2]
             self.assertRaises(TypeError, paddle.tensor.expand, x1, shape)
-            x2 = fluid.layers.data(name='x2', shape=[4], dtype="uint8")
+            x2 = paddle.static.data(name='x2', shape=[-1, 4], dtype="uint8")
             self.assertRaises(TypeError, paddle.tensor.expand, x2, shape)
-            x3 = fluid.layers.data(name='x3', shape=[4], dtype="bool")
+            x3 = paddle.static.data(name='x3', shape=[-1, 4], dtype="bool")
             x3.stop_gradient = False
             self.assertRaises(ValueError, paddle.tensor.expand, x3, shape)
 
@@ -205,15 +204,12 @@ class TestExpandV2Error(unittest.TestCase):
 class TestExpandV2API(unittest.TestCase):
     def test_api(self):
         input = np.random.random([12, 14]).astype("float32")
-        x = fluid.layers.data(
-            name='x', shape=[12, 14], append_batch_size=False, dtype="float32"
-        )
+        x = paddle.static.data(name='x', shape=[12, 14], dtype="float32")
 
         positive_2 = fluid.layers.fill_constant([1], "int32", 12)
-        expand_shape = fluid.layers.data(
+        expand_shape = paddle.static.data(
             name="expand_shape",
             shape=[2],
-            append_batch_size=False,
             dtype="int32",
         )
 
@@ -273,7 +269,7 @@ class TestExpandDoubleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float32
 
-        data = layers.data('data', [2, 3], False, dtype)
+        data = paddle.static.data('data', [2, 3], dtype)
         data.persistable = True
         out = paddle.expand(data, [2, 3])
         data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
@@ -304,7 +300,7 @@ class TestExpandTripleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float32
 
-        data = layers.data('data', [2, 3], False, dtype)
+        data = paddle.static.data('data', [2, 3], dtype)
         data.persistable = True
         out = paddle.expand(data, [2, 3])
         data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
diff --git a/python/paddle/fluid/tests/unittests/test_fc_op.py b/python/paddle/fluid/tests/unittests/test_fc_op.py
index b07fc1d0001a5ab2e1b63cbce0a9f6f2c0660ad0..03ef6c3db6c190216c7206f7e7fabe69eb2ffbf9 100644
--- a/python/paddle/fluid/tests/unittests/test_fc_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fc_op.py
@@ -146,10 +146,9 @@ class TestFcOp_NumFlattenDims_NegOne(unittest.TestCase):
 
             with program_guard(main_program, startup_program):
                 input = np.random.random([2, 2, 25]).astype("float32")
-                x = fluid.layers.data(
+                x = paddle.static.data(
                     name="x",
                     shape=[2, 2, 25],
-                    append_batch_size=False,
                     dtype="float32",
                 )
 
@@ -191,13 +190,13 @@ class TestFCOpError(unittest.TestCase):
 
             def test_type():
                 # dtype must be float32 or float64
-                x2 = fluid.layers.data(name='x2', shape=[4], dtype='int32')
+                x2 = paddle.static.data(name='x2', shape=[-1, 4], dtype='int32')
                 paddle.static.nn.fc(x=x2, size=1)
 
             self.assertRaises(TypeError, test_type)
 
             # The input dtype of fc can be float16 in GPU, test for warning
-            x3 = fluid.layers.data(name='x3', shape=[4], dtype='float16')
+            x3 = paddle.static.data(name='x3', shape=[-1, 4], dtype='float16')
             paddle.static.nn.fc(x=x3, size=1)
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_feed_data_check_shape_type.py b/python/paddle/fluid/tests/unittests/test_feed_data_check_shape_type.py
index b642e2524ee292f87ac24f6b9396b91b0e4f04c1..0e5330014d3f5f3891a8db32cc67e85a4e0448c3 100644
--- a/python/paddle/fluid/tests/unittests/test_feed_data_check_shape_type.py
+++ b/python/paddle/fluid/tests/unittests/test_feed_data_check_shape_type.py
@@ -30,7 +30,7 @@ np.random.seed(123)
 class TestFeedData(unittest.TestCase):
     '''
     Test paddle.fluid.data feeds with different shape and types.
-    Note: paddle.fluid.data is not paddle.fluid.layers.data.
+    Note: paddle.fluid.data is not paddle.static.data.
     '''
 
     def setUp(self):
diff --git a/python/paddle/fluid/tests/unittests/test_fetch_unmerged.py b/python/paddle/fluid/tests/unittests/test_fetch_unmerged.py
index d93ee36b6e2409d793e5a35a0d281150628146ee..f7313f93e0d14e01d29693b458b7f43b518e52cc 100644
--- a/python/paddle/fluid/tests/unittests/test_fetch_unmerged.py
+++ b/python/paddle/fluid/tests/unittests/test_fetch_unmerged.py
@@ -57,11 +57,11 @@ class TestFetchUnmerged(unittest.TestCase):
     def build_program(self, main, startup, is_test):
         with fluid.unique_name.guard():
             with fluid.program_guard(main, startup):
-                img = fluid.layers.data(
-                    name='image', shape=[1, 28, 28], dtype='float32'
+                img = paddle.static.data(
+                    name='image', shape=[-1, 1, 28, 28], dtype='float32'
                 )
-                label = fluid.layers.data(
-                    name='label', shape=[1], dtype='int64'
+                label = paddle.static.data(
+                    name='label', shape=[-1, 1], dtype='int64'
                 )
                 loss, prediction = self.conv_net(img, label)
                 if not is_test:
diff --git a/python/paddle/fluid/tests/unittests/test_fill_constant_op.py b/python/paddle/fluid/tests/unittests/test_fill_constant_op.py
index 5e1af99259db1acd868da01614399f286101db3f..38ef0379747dbaa5aa2709efbc3033b9b1966b55 100644
--- a/python/paddle/fluid/tests/unittests/test_fill_constant_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fill_constant_op.py
@@ -379,7 +379,7 @@ class TestFillConstantOpError(unittest.TestCase):
     def test_errors(self):
         with program_guard(Program(), Program()):
             # for ci coverage
-            x1 = fluid.layers.data(name='x1', shape=[1], dtype="int16")
+            x1 = paddle.static.data(name='x1', shape=[-1, 1], dtype="int16")
             self.assertRaises(
                 TypeError,
                 fluid.layers.fill_constant,
@@ -399,7 +399,7 @@ class TestFillConstantOpError(unittest.TestCase):
 
             # The argument dtype of fill_constant_op must be one of bool, float16,
             # float32, float64, uint8, int16, int32 or int64
-            x2 = fluid.layers.data(name='x2', shape=[1], dtype="int32")
+            x2 = paddle.static.data(name='x2', shape=[-1, 1], dtype="int32")
 
             self.assertRaises(
                 TypeError,
diff --git a/python/paddle/fluid/tests/unittests/test_fleet.py b/python/paddle/fluid/tests/unittests/test_fleet.py
index bc5a083d17d4c1e92647ed4f051fd36b70c0e21e..736f68be35806fd219d23ee852f0fdac039f9856 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet.py
@@ -52,12 +52,11 @@ class TestFleet1(unittest.TestCase):
         startup_program = fluid.Program()
         scope = fluid.Scope()
         with fluid.program_guard(train_program, startup_program):
-            show = fluid.layers.data(
+            show = paddle.static.data(
                 name="show",
                 shape=[-1, 1],
                 dtype="int64",
                 lod_level=1,
-                append_batch_size=False,
             )
             emb = fluid.layers.embedding(
                 input=show,
@@ -71,12 +70,11 @@ class TestFleet1(unittest.TestCase):
                 input=bow, epsilon=1e-4, name="norm"
             )
             fc = paddle.static.nn.fc(x=bow, size=1, activation=None)
-            label = fluid.layers.data(
+            label = paddle.static.data(
                 name="click",
                 shape=[-1, 1],
                 dtype="int64",
                 lod_level=1,
-                append_batch_size=False,
             )
             label_cast = fluid.layers.cast(label, dtype='float32')
             cost = paddle.nn.functional.log_loss(fc, label_cast)
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_api_input.py b/python/paddle/fluid/tests/unittests/test_fleet_api_input.py
index 12acfdf76321d3ab95b29c9845ae9d0f1052d2c6..b57a30d75266d3676718b93099dbffb6b61c423a 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_api_input.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_api_input.py
@@ -61,7 +61,7 @@ class FleetTest(unittest.TestCase):
         self.assertRaises(Exception, fleet.split_files, "files")
         self.assertRaises(Exception, fleet.init, "pserver")
 
-        data = fluid.layers.data(name='X', shape=[1], dtype='float32')
+        data = paddle.static.data(name='X', shape=[-1, 1], dtype='float32')
         hidden = paddle.static.nn.fc(x=data, size=10)
         loss = paddle.mean(hidden)
         adam = fluid.optimizer.Adam()
@@ -176,7 +176,7 @@ class TranspilerOptimizerTest(unittest.TestCase):
 
         transpiler = TranspilerOptimizer(fluid.optimizer.Adam(0.001))
         self.assertRaises(Exception, transpiler.minimize, loss=[])
-        data = fluid.layers.data(name='X', shape=[1], dtype='float32')
+        data = paddle.static.data(name='X', shape=[-1, 1], dtype='float32')
         hidden = paddle.static.nn.fc(x=data, size=10)
         loss = paddle.mean(hidden)
         self.assertRaises(
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_auto.py b/python/paddle/fluid/tests/unittests/test_fleet_auto.py
index 1e30f703ff893b64a15af32316f6a4caaacc9b29..b9b18f04fac6e536e61002bdffa1345d8bba3dea 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_auto.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_auto.py
@@ -32,10 +32,8 @@ class TestDistributedStrategyAuto(unittest.TestCase):
 
     def test_distributed_strategy_auto(self):
         fleet.init(is_collective=True)
-        input_x = paddle.fluid.layers.data(
-            name="x", shape=[32], dtype='float32'
-        )
-        input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64')
+        input_x = paddle.static.data(name="x", shape=[-1, 32], dtype='float32')
+        input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64')
 
         fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
         fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_base_2.py b/python/paddle/fluid/tests/unittests/test_fleet_base_2.py
index 4a56f8913a434a76080a0f61b633293362a687b4..ee5a84d1e41e810e2dc6a47758fd672e91223eb0 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_base_2.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_base_2.py
@@ -39,13 +39,11 @@ class TestFleetBase(unittest.TestCase):
         os.environ["TRAINING_ROLE"] = "TRAINER"
         os.environ["PADDLE_TRAINER_ID"] = "1"
 
-        input_x = paddle.fluid.layers.data(
-            name="x", shape=[32], dtype='float32'
+        input_x = paddle.static.data(name="x", shape=[-1, 32], dtype='float32')
+        input_slot = paddle.static.data(
+            name="slot", shape=[-1, 1], dtype='int64'
         )
-        input_slot = paddle.fluid.layers.data(
-            name="slot", shape=[1], dtype='int64'
-        )
-        input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64')
+        input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64')
 
         emb = paddle.fluid.layers.embedding(
             input=input_slot, size=[10, 9], is_sparse=True
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_base_3.py b/python/paddle/fluid/tests/unittests/test_fleet_base_3.py
index 30cdf5fbed4b7267c2d404e8d7010afff40a2f60..e24beee28e12b85e16abf1e5738f5e9b934835d5 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_base_3.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_base_3.py
@@ -32,10 +32,8 @@ class TestFleetBase_1(unittest.TestCase):
         ] = "127.0.0.1:36001,127.0.0.2:36001"
 
     def test_collective_minimize(self):
-        input_x = paddle.fluid.layers.data(
-            name="x", shape=[32], dtype='float32'
-        )
-        input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64')
+        input_x = paddle.static.data(name="x", shape=[-1, 32], dtype='float32')
+        input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64')
 
         fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
         fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
@@ -63,10 +61,8 @@ class TestFleetBase(unittest.TestCase):
         ] = "127.0.0.1:36001,127.0.0.2:36001"
 
     def test_fleet_get_applied_optimizer(self):
-        input_x = paddle.fluid.layers.data(
-            name="x", shape=[32], dtype='float32'
-        )
-        input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64')
+        input_x = paddle.static.data(name="x", shape=[-1, 32], dtype='float32')
+        input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64')
 
         fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh')
         fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh')
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_executor.py b/python/paddle/fluid/tests/unittests/test_fleet_executor.py
index 400009f820de3c59cafb87582ca43c77dc7ae176..d798ffb016c536aaf763136b8a5e6dec560f162a 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_executor.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_executor.py
@@ -45,12 +45,14 @@ class TestFleetExecutor(unittest.TestCase):
         exe = paddle.static.Executor(place)
         empty_program = paddle.static.Program()
         with fluid.program_guard(empty_program, empty_program):
-            x = fluid.layers.data(
-                name='x', shape=x_data.shape, dtype=x_data.dtype
+            x = paddle.static.data(
+                name='x', shape=[-1] + list(x_data.shape), dtype=x_data.dtype
             )
-            y = fluid.layers.data(
-                name='y', shape=y_data.shape, dtype=y_data.dtype
+            x.desc.set_need_check_feed(False)
+            y = paddle.static.data(
+                name='y', shape=[-1] + list(y_data.shape), dtype=y_data.dtype
             )
+            y.desc.set_need_check_feed(False)
             z = x + y
             a = 2 * x + 3 * y
             loss = paddle.mean(a)
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_executor_cond_interceptor.py b/python/paddle/fluid/tests/unittests/test_fleet_executor_cond_interceptor.py
new file mode 100644
index 0000000000000000000000000000000000000000..d3a57898a0dce8f4309ce0833262c88085ad5e71
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_fleet_executor_cond_interceptor.py
@@ -0,0 +1,171 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import paddle
+import paddle.fluid.core as core
+from paddle.distributed.fleet.fleet_executor_utils import TaskNode
+
+paddle.enable_static()
+
+
+def cond(i, ten):
+    return i < ten
+
+
+def body(i, ten):
+    i = i + 1
+    return [i, ten]
+
+
+class TestFleetExecutor(unittest.TestCase):
+    def test_cond_interceptor(self):
+        main_program = paddle.static.Program()
+        startup_program = paddle.static.Program()
+        with paddle.static.program_guard(main_program, startup_program):
+            i = paddle.full(
+                shape=[1], fill_value=0, dtype='int64'
+            )  # loop counter
+            ten = paddle.full(
+                shape=[1], fill_value=10, dtype='int64'
+            )  # loop length
+            i, ten = paddle.static.nn.while_loop(cond, body, [i, ten])
+
+        program_a = paddle.static.Program()
+        program_b = paddle.static.Program()
+
+        for var_name in main_program.block(0).vars:
+            if var_name != "_generated_var_0":
+                var = main_program.block(0).var(var_name)
+                program_a.block(0).create_var(
+                    name=var_name,
+                    shape=var.shape,
+                    dtype=var.dtype,
+                    stop_gradient=var.stop_gradient,
+                )
+                program_b.block(0).create_var(
+                    name=var_name,
+                    shape=var.shape,
+                    dtype=var.dtype,
+                    stop_gradient=var.stop_gradient,
+                )
+
+        for op in main_program.block(0).ops:
+            if op.type != "while":
+                program_a.block(0).append_op(
+                    type=op.type,
+                    inputs=op.desc.inputs(),
+                    outputs=op.desc.outputs(),
+                    attrs=op.all_attrs(),
+                )
+
+        for var_name in main_program.block(1).vars:
+            var = main_program.block(1).var(var_name)
+            program_b.block(0).create_var(
+                name=var_name,
+                shape=var.shape,
+                dtype=var.dtype,
+                stop_gradient=var.stop_gradient,
+            )
+
+        for op in main_program.block(1).ops:
+            program_b.block(0).append_op(
+                type=op.type,
+                inputs=op.desc.inputs(),
+                outputs=op.desc.outputs(),
+                attrs=op.all_attrs(),
+            )
+
+        cond_var_name = "tmp_0"
+        num_micro_batches = 3
+
+        task_a = TaskNode(
+            0,
+            num_micro_batches,
+            node_type="Compute",
+            task_id=0,
+            program=program_a,
+            lazy_initialize=True,
+        )
+        task_b = TaskNode(
+            0,
+            num_micro_batches,
+            node_type="Cond",
+            task_id=1,
+            program=paddle.static.Program(),
+            cond_var_name=cond_var_name,
+            lazy_initialize=True,
+        )
+        task_c = TaskNode(
+            0,
+            num_micro_batches,
+            node_type="Compute",
+            task_id=2,
+            program=program_b,
+            lazy_initialize=True,
+        )
+        task_d = TaskNode(
+            0,
+            num_micro_batches,
+            node_type="Compute",
+            task_id=3,
+            program=paddle.static.Program(),
+            lazy_initialize=True,
+        )
+        task_e = TaskNode(
+            0,
+            num_micro_batches,
+            node_type="Compute",
+            task_id=4,
+            program=paddle.static.Program(),
+            lazy_initialize=True,
+        )
+
+        task_a.add_downstream_task(task_b.task_id(), 2)
+        task_b.add_upstream_task(task_a.task_id(), 2)
+        task_b.add_downstream_task(task_c.task_id(), 100)
+        task_c.add_upstream_task(task_b.task_id(), 100)
+        task_c.add_downstream_task(task_d.task_id(), 2)
+        task_d.add_upstream_task(task_c.task_id(), 2)
+        task_d.add_downstream_task(task_b.task_id(), 100, core.DependType.LOOP)
+        task_b.add_upstream_task(task_d.task_id(), 100, core.DependType.LOOP)
+        task_b.add_downstream_task(
+            task_e.task_id(), 100, core.DependType.STOP_LOOP
+        )
+        task_e.add_upstream_task(
+            task_b.task_id(), 100, core.DependType.STOP_LOOP
+        )
+
+        main_program._pipeline_opt = {
+            "fleet_opt": {
+                'tasks': [task_a, task_b, task_c, task_d, task_e],
+                'task_id_to_rank': {
+                    task_a.task_id(): 0,
+                    task_b.task_id(): 0,
+                    task_c.task_id(): 0,
+                    task_d.task_id(): 0,
+                    task_e.task_id(): 0,
+                },
+                'num_micro_batches': num_micro_batches,
+            },
+        }
+
+        place = paddle.fluid.CUDAPlace(0)
+        exe = paddle.fluid.Executor(place)
+        exe.run(main_program)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_executor_multi_devices.py b/python/paddle/fluid/tests/unittests/test_fleet_executor_multi_devices.py
index b2a1c488dcec66016c00d3d7e8b1de9b22b91d57..53bd4a20bcd12304b61272cb4e80def3c4324136 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_executor_multi_devices.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_executor_multi_devices.py
@@ -27,7 +27,9 @@ class TestFleetExecutor(unittest.TestCase):
         exe = paddle.static.Executor(place)
         empty_program = paddle.static.Program()
         with fluid.program_guard(empty_program, empty_program):
-            x = fluid.layers.data(name='x', shape=[1], dtype=paddle.float32)
+            x = paddle.static.data(
+                name='x', shape=[-1, 1], dtype=paddle.float32
+            )
         empty_program._pipeline_opt = {
             "fleet_opt": fleet_opt,
             "section_program": empty_program,
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_executor_origin_scheduler.py b/python/paddle/fluid/tests/unittests/test_fleet_executor_origin_scheduler.py
index d24348b7d77b58234f2dbc1ef9d7ae7d563a19d3..726687d87df9188cdab1274addddef0aad29dcda 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_executor_origin_scheduler.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_executor_origin_scheduler.py
@@ -45,12 +45,14 @@ class TestFleetExecutor(unittest.TestCase):
         exe = paddle.static.Executor(place)
         empty_program = paddle.static.Program()
         with fluid.program_guard(empty_program, empty_program):
-            x = fluid.layers.data(
-                name='x', shape=x_data.shape, dtype=x_data.dtype
+            x = paddle.static.data(
+                name='x', shape=[-1] + list(x_data.shape), dtype=x_data.dtype
             )
-            y = fluid.layers.data(
-                name='y', shape=y_data.shape, dtype=y_data.dtype
+            x.desc.set_need_check_feed(False)
+            y = paddle.static.data(
+                name='y', shape=[-1] + list(y_data.shape), dtype=y_data.dtype
             )
+            y.desc.set_need_check_feed(False)
             z = x + y
             a = 2 * x + 3 * y
             loss = paddle.mean(a)
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_executor_task_node.py b/python/paddle/fluid/tests/unittests/test_fleet_executor_task_node.py
index 6d74fdc075cdf0f3a0282dd74c7a024028b2d1bc..e61c30f6c57bad4d12247deea18ef9ddca182beb 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_executor_task_node.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_executor_task_node.py
@@ -24,16 +24,22 @@ paddle.enable_static()
 class TestFleetExecutorTaskNode(unittest.TestCase):
     def test_task_node(self):
         program = paddle.static.Program()
-        task_node_0 = core.TaskNode(program.desc, 0, 1, 1)
+        task_node_0 = core.TaskNode(program.desc, 0, 0, 1)
         task_node_1 = core.TaskNode(program.desc, 0, 1, 1)
-        task_node_2 = core.TaskNode(program.desc, 0, 1, 1)
+        task_node_2 = core.TaskNode(program.desc, 0, 2, 1)
         self.assertEqual(task_node_0.task_id(), 0)
         self.assertEqual(task_node_1.task_id(), 1)
         self.assertEqual(task_node_2.task_id(), 2)
         self.assertTrue(
-            task_node_0.add_downstream_task(task_node_1.task_id(), 1)
+            task_node_0.add_downstream_task(
+                task_node_1.task_id(), 1, core.DependType.NORMAL
+            )
+        )
+        self.assertTrue(
+            task_node_1.add_upstream_task(
+                task_node_0.task_id(), 1, core.DependType.NORMAL
+            )
         )
-        self.assertTrue(task_node_1.add_upstream_task(task_node_0.task_id(), 1))
 
     def test_lazy_task_node(self):
         program = paddle.static.Program()
@@ -41,7 +47,6 @@ class TestFleetExecutorTaskNode(unittest.TestCase):
             program=program,
             rank=0,
             max_run_times=1,
-            max_slot_times=1,
             lazy_initialize=True,
         )
         task_node = task.task_node()
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_executor_with_task_nodes.py b/python/paddle/fluid/tests/unittests/test_fleet_executor_with_task_nodes.py
index 46eb0dc6f0bf8428ca0b5b6989fb6444ca5b2495..e60d0f1ff61db92a01d4e9e14aa7b1d4b33f3b77 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_executor_with_task_nodes.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_executor_with_task_nodes.py
@@ -28,12 +28,14 @@ class TestFleetExecutor(unittest.TestCase):
         exe = paddle.static.Executor(place)
         empty_program = paddle.static.Program()
         with fluid.program_guard(empty_program, empty_program):
-            x = fluid.layers.data(
-                name='x', shape=x_data.shape, dtype=x_data.dtype
+            x = paddle.static.data(
+                name='x', shape=[-1] + list(x_data.shape), dtype=x_data.dtype
             )
-            y = fluid.layers.data(
-                name='y', shape=y_data.shape, dtype=y_data.dtype
+            x.desc.set_need_check_feed(False)
+            y = paddle.static.data(
+                name='y', shape=[-1] + list(y_data.shape), dtype=y_data.dtype
             )
+            y.desc.set_need_check_feed(False)
             z = x + y
             a = 2 * x + 3 * y
             loss = paddle.mean(a)
@@ -57,7 +59,6 @@ class TestFleetExecutor(unittest.TestCase):
             rank=0,
             node_type="Compute",
             max_run_times=1,
-            max_slot_times=1,
             lazy_initialize=True,
         )
         empty_program._pipeline_opt = {
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_nocvm_1.py b/python/paddle/fluid/tests/unittests/test_fleet_nocvm_1.py
index c651a456fd5112e802dfee765542a4d04737d844..09a9db8ccd5739604a1dd38a8fbad627fd6a6013 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_nocvm_1.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_nocvm_1.py
@@ -52,12 +52,11 @@ class TestFleet1(unittest.TestCase):
         startup_program = fluid.Program()
         scope = fluid.Scope()
         with fluid.program_guard(train_program, startup_program):
-            show = fluid.layers.data(
+            show = paddle.static.data(
                 name="show",
                 shape=[-1, 1],
                 dtype="int64",
                 lod_level=1,
-                append_batch_size=False,
             )
             emb = fluid.layers.embedding(
                 input=show,
@@ -67,12 +66,11 @@ class TestFleet1(unittest.TestCase):
                 param_attr=fluid.ParamAttr(name="embedding"),
             )
             fc = paddle.static.nn.fc(x=emb, size=1, activation=None)
-            label = fluid.layers.data(
+            label = paddle.static.data(
                 name="click",
                 shape=[-1, 1],
                 dtype="int64",
                 lod_level=1,
-                append_batch_size=False,
             )
             label_cast = fluid.layers.cast(label, dtype='float32')
             cost = paddle.nn.functional.log_loss(fc, label_cast)
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker.py b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker.py
index 4c3c321ac0ad3c103ef0ecb828cd4eb9483f84f5..893956587eb81fc63a30502f20cdb17adb4d2dda 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker.py
@@ -82,20 +82,12 @@ class TestCloudRoleMaker(unittest.TestCase):
         startup_program = fluid.Program()
         scope = fluid.Scope()
         with fluid.program_guard(train_program, startup_program):
-            show = fluid.layers.data(
-                name="show",
-                shape=[-1, 1],
-                dtype="float32",
-                lod_level=1,
-                append_batch_size=False,
+            show = paddle.static.data(
+                name="show", shape=[-1, 1], dtype="float32", lod_level=1
             )
             fc = paddle.static.nn.fc(x=show, size=1, activation=None)
-            label = fluid.layers.data(
-                name="click",
-                shape=[-1, 1],
-                dtype="int64",
-                lod_level=1,
-                append_batch_size=False,
+            label = paddle.static.data(
+                name="click", shape=[-1, 1], dtype="int64", lod_level=1
             )
             label_cast = fluid.layers.cast(label, dtype='float32')
             cost = paddle.nn.functional.log_loss(fc, label_cast)
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_2.py b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_2.py
index 50a6013e2d2172b33356e6f7dc2bb0800cac2cb9..ece84ed1d5bc785c7159c4207134cabff6f22c95 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_2.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_2.py
@@ -63,20 +63,12 @@ class TestCloudRoleMaker2(unittest.TestCase):
         startup_program = fluid.Program()
         scope = fluid.Scope()
         with fluid.program_guard(train_program, startup_program):
-            show = fluid.layers.data(
-                name="show",
-                shape=[-1, 1],
-                dtype="float32",
-                lod_level=1,
-                append_batch_size=False,
+            show = paddle.static.data(
+                name="show", shape=[-1, 1], dtype="float32", lod_level=1
             )
             fc = paddle.static.nn.fc(x=show, size=1, activation=None)
-            label = fluid.layers.data(
-                name="click",
-                shape=[-1, 1],
-                dtype="int64",
-                lod_level=1,
-                append_batch_size=False,
+            label = paddle.static.data(
+                name="click", shape=[-1, 1], dtype="int64", lod_level=1
             )
             label_cast = fluid.layers.cast(label, dtype='float32')
             cost = paddle.nn.functional.log_loss(fc, label_cast)
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_3.py b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_3.py
index 27cb171c0dd6d285a4526224ae2b2210c8ff27be..94e293978b59756c516f0d6a667b3f45324b199a 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_3.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_3.py
@@ -56,20 +56,12 @@ class TestCloudRoleMaker(unittest.TestCase):
         startup_program = fluid.Program()
         scope = fluid.Scope()
         with fluid.program_guard(train_program, startup_program):
-            show = fluid.layers.data(
-                name="show",
-                shape=[-1, 1],
-                dtype="float32",
-                lod_level=1,
-                append_batch_size=False,
+            show = paddle.static.data(
+                name="show", shape=[-1, 1], dtype="float32", lod_level=1
             )
             fc = paddle.static.nn.fc(x=show, size=1, activation=None)
-            label = fluid.layers.data(
-                name="click",
-                shape=[-1, 1],
-                dtype="int64",
-                lod_level=1,
-                append_batch_size=False,
+            label = paddle.static.data(
+                name="click", shape=[-1, 1], dtype="int64", lod_level=1
             )
             label_cast = fluid.layers.cast(label, dtype='float32')
             cost = paddle.nn.functional.log_loss(fc, label_cast)
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_unitaccessor.py b/python/paddle/fluid/tests/unittests/test_fleet_unitaccessor.py
index 178fcfa230e449c4d0a1e8c96f2a3401a728dbdc..3eb24d9b4015b6b630585b54325b1c790148ac4f 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_unitaccessor.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_unitaccessor.py
@@ -52,12 +52,8 @@ class TestFleet1(unittest.TestCase):
         startup_program = fluid.Program()
         scope = fluid.Scope()
         with fluid.program_guard(train_program, startup_program):
-            show = fluid.layers.data(
-                name="show",
-                shape=[-1, 1],
-                dtype="int64",
-                lod_level=1,
-                append_batch_size=False,
+            show = paddle.static.data(
+                name="show", shape=[-1, 1], dtype="int64", lod_level=1
             )
             emb = fluid.layers.embedding(
                 input=show,
@@ -67,12 +63,8 @@ class TestFleet1(unittest.TestCase):
                 param_attr=fluid.ParamAttr(name="embedding"),
             )
             fc = paddle.static.nn.fc(x=emb, size=1, activation=None)
-            label = fluid.layers.data(
-                name="click",
-                shape=[-1, 1],
-                dtype="int64",
-                lod_level=1,
-                append_batch_size=False,
+            label = paddle.static.data(
+                name="click", shape=[-1, 1], dtype="int64", lod_level=1
             )
             label_cast = fluid.layers.cast(label, dtype='float32')
             cost = paddle.nn.functional.log_loss(fc, label_cast)
diff --git a/python/paddle/fluid/tests/unittests/test_flip.py b/python/paddle/fluid/tests/unittests/test_flip.py
index f7ebd3d1d54f9534459b8c713a5b4231bbc7ba72..1807199821eb7459c3573877928f8f44172ba129 100644
--- a/python/paddle/fluid/tests/unittests/test_flip.py
+++ b/python/paddle/fluid/tests/unittests/test_flip.py
@@ -22,7 +22,6 @@ from op_test import OpTest
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import paddle.fluid.layers as layers
 
 
 class TestFlipOp_API(unittest.TestCase):
@@ -145,7 +144,7 @@ class TestFlipDoubleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float32
 
-        data = layers.data('data', [3, 2, 2], False, dtype)
+        data = paddle.static.data('data', [3, 2, 2], dtype)
         data.persistable = True
         out = paddle.flip(data, [0, 1])
         data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
@@ -177,7 +176,7 @@ class TestFlipTripleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float32
 
-        data = layers.data('data', [3, 2, 2], False, dtype)
+        data = paddle.static.data('data', [3, 2, 2], dtype)
         data.persistable = True
         out = paddle.flip(data, [0, 1])
         data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
diff --git a/python/paddle/fluid/tests/unittests/test_fuse_bn_act_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_bn_act_pass.py
index 9e067454482d79b8292a35fd79d357c4d0ab0f78..4d7fb60d4660e16947bb62734d66cd48d36691f0 100644
--- a/python/paddle/fluid/tests/unittests/test_fuse_bn_act_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_fuse_bn_act_pass.py
@@ -21,8 +21,10 @@ import paddle.fluid as fluid
 class TestFuseBatchNormActPass(unittest.TestCase):
     def build_program(self, main_program, startup_program, use_cuda, seed=1):
         with fluid.program_guard(main_program, startup_program):
-            x = fluid.layers.data(name='x', shape=[1, 28, 28], dtype='float32')
-            y = fluid.layers.data(name="y", shape=[1], dtype='int64')
+            x = paddle.static.data(
+                name='x', shape=[-1, 1, 28, 28], dtype='float32'
+            )
+            y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64')
             hidden1 = paddle.static.nn.conv2d(
                 input=x,
                 filter_size=3,
diff --git a/python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py
index a8d0f89c86649d3e823b7c1f18d348b2fa9d105b..d981ccbe14ccbafd3bc7ec1f9a54316965c37908 100644
--- a/python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py
@@ -64,8 +64,10 @@ class TestFusedBnAddActAPI(unittest.TestCase):
         self, main_program, startup_program, use_cuda, seed=1
     ):
         with fluid.program_guard(main_program, startup_program):
-            x = fluid.layers.data(name='x', shape=[1, 28, 28], dtype='float32')
-            y = fluid.layers.data(name="y", shape=[1], dtype='int64')
+            x = paddle.static.data(
+                name='x', shape=[-1, 1, 28, 28], dtype='float32'
+            )
+            y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64')
             conv1_1 = paddle.static.nn.conv2d(
                 input=x,
                 filter_size=3,
@@ -123,8 +125,10 @@ class TestFusedBnAddActAPI(unittest.TestCase):
         self, main_program, startup_program, use_cuda, seed=1
     ):
         with fluid.program_guard(main_program, startup_program):
-            x = fluid.layers.data(name='x', shape=[1, 28, 28], dtype='float32')
-            y = fluid.layers.data(name="y", shape=[1], dtype='int64')
+            x = paddle.static.data(
+                name='x', shape=[-1, 1, 28, 28], dtype='float32'
+            )
+            y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64')
             conv1_1 = paddle.static.nn.conv2d(
                 input=x,
                 filter_size=3,
diff --git a/python/paddle/fluid/tests/unittests/test_fuse_relu_depthwise_conv_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_relu_depthwise_conv_pass.py
index 388025fb2adcb86fa4dc043843a05dca7c726065..c4f576ce2036c1e2b44ff2b4a3f66fdc19909c21 100644
--- a/python/paddle/fluid/tests/unittests/test_fuse_relu_depthwise_conv_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_fuse_relu_depthwise_conv_pass.py
@@ -55,8 +55,8 @@ def sep_conv(input, channel, stride, filter, dilation=1, act=None):
 
 def simple_depthwise_net(use_feed):
     assert use_feed
-    img = fluid.layers.data(name='image', shape=[784], dtype='float32')
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    img = paddle.static.data(name='image', shape=[-1, 784], dtype='float32')
+    label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
     hidden = paddle.reshape(img, (-1, 1, 28, 28))
     for _ in range(4):
         hidden = sep_conv(hidden, channel=200, stride=2, filter=5)
diff --git a/python/paddle/fluid/tests/unittests/test_fused_attention_pass.py b/python/paddle/fluid/tests/unittests/test_fused_attention_pass.py
index ff2e2f73286994e1bbfc5920f64a79a9b6be3c8f..cce05d8747cdf87128725379c79266e296ece4e4 100644
--- a/python/paddle/fluid/tests/unittests/test_fused_attention_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_fused_attention_pass.py
@@ -114,9 +114,7 @@ class TestFusedAttentionPass(unittest.TestCase):
         hidden_size = 768
         num_heads = 12
 
-        x_data = np.random.rand(batch_size, seq_len, hidden_size).astype(
-            'float32'
-        )
+        x_data = np.random.rand(batch_size, seq_len, seq_len).astype('float32')
         mask_data = np.random.rand(
             batch_size, num_heads, seq_len, seq_len
         ).astype('float32')
@@ -127,7 +125,7 @@ class TestFusedAttentionPass(unittest.TestCase):
         with paddle.static.program_guard(main_prog, startup_prog):
             data = paddle.static.data(
                 name="x",
-                shape=[-1, seq_len, hidden_size],
+                shape=[-1, seq_len, seq_len],
                 dtype='float32',
             )
             if self.add_mask:
@@ -138,6 +136,7 @@ class TestFusedAttentionPass(unittest.TestCase):
                 )
             else:
                 attn_mask = None
+            data_linear = paddle.nn.Linear(seq_len, hidden_size)
             multi_head_attn = MultiHeadAttention(
                 hidden_size,
                 num_heads,
@@ -146,7 +145,9 @@ class TestFusedAttentionPass(unittest.TestCase):
                 post_ln=self.post_ln,
                 attn_dropout=self.attn_dropout,
             )
-            out = multi_head_attn(data, attn_mask)
+
+            attn_input = data_linear(data)
+            out = multi_head_attn(attn_input, attn_mask)
             loss = paddle.mean(out)
 
             sgd_optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.001)
@@ -156,7 +157,13 @@ class TestFusedAttentionPass(unittest.TestCase):
         pass_manager.apply([main_prog], [startup_prog])
 
         ops = main_prog.global_block().ops
-        assert ops[0].type == 'reduce_mean'
+        assert ops[2].type == 'reduce_mean'
+        assert ops[4].type == 'reduce_mean_grad'
+        # two ops for linear, one op for reduce mean
+        # one fill constant
+        # one op for reduce mean grad, two ops for linear bwd
+        # the eighth op should be the optimizer
+        assert ops[7].type == 'sgd'
 
 
 if __name__ == "__main__":
diff --git a/python/paddle/fluid/tests/unittests/test_fused_emb_seq_pool_op.py b/python/paddle/fluid/tests/unittests/test_fused_emb_seq_pool_op.py
index 3e3f0ca05a62f281c1f56f85ed2e044580a73f14..460bf43bbe091be66153209f10cb39010541e8db 100644
--- a/python/paddle/fluid/tests/unittests/test_fused_emb_seq_pool_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fused_emb_seq_pool_op.py
@@ -18,6 +18,7 @@ import unittest
 import numpy as np
 from op_test import OpTest, skip_check_grad_ci
 
+import paddle
 import paddle.version as ver
 
 
@@ -108,8 +109,8 @@ class TestFusedEmbeddingSeqPoolApi(unittest.TestCase):
             import paddle.fluid as fluid
 
             dict_size = 20
-            data_t = fluid.layers.data(
-                name='word', shape=[1], dtype='int64', lod_level=1
+            data_t = paddle.static.data(
+                name='word', shape=[-1, 1], dtype='int64', lod_level=1
             )
             padding_idx = np.random.randint(1, 10)
             out = fluid.contrib.fused_embedding_seq_pool(
diff --git a/python/paddle/fluid/tests/unittests/test_gather_nd_op.py b/python/paddle/fluid/tests/unittests/test_gather_nd_op.py
index 6a3f5cf8556809bbecbda4cc18bd50d56e49ccb4..6c2b6d15f6f639b00b914b1bc087cc88bc750b25 100644
--- a/python/paddle/fluid/tests/unittests/test_gather_nd_op.py
+++ b/python/paddle/fluid/tests/unittests/test_gather_nd_op.py
@@ -160,20 +160,28 @@ class TestGatherNdOpWithHighRankDiff(OpTest):
 # Test Python API
 class TestGatherNdOpAPI(unittest.TestCase):
     def test_case1(self):
-        x1 = fluid.layers.data(
-            name='x1', shape=[30, 40, 50, 60], dtype='float32'
+        x1 = paddle.static.data(
+            name='x1', shape=[-1, 30, 40, 50, 60], dtype='float32'
+        )
+        index1 = paddle.static.data(
+            name='index1', shape=[-1, 2, 4], dtype='int32'
         )
-        index1 = fluid.layers.data(name='index1', shape=[2, 4], dtype='int32')
         output1 = paddle.gather_nd(x1, index1)
 
     def test_case2(self):
-        x2 = fluid.layers.data(name='x2', shape=[30, 40, 50], dtype='float32')
-        index2 = fluid.layers.data(name='index2', shape=[2, 2], dtype='int64')
+        x2 = paddle.static.data(
+            name='x2', shape=[-1, 30, 40, 50], dtype='float32'
+        )
+        index2 = paddle.static.data(
+            name='index2', shape=[-1, 2, 2], dtype='int64'
+        )
         output2 = paddle.gather_nd(x2, index2)
 
     def test_case3(self):
-        x3 = fluid.layers.data(name='x3', shape=[3, 4, 5], dtype='float32')
-        index3 = fluid.layers.data(name='index3', shape=[2, 1], dtype='int32')
+        x3 = paddle.static.data(name='x3', shape=[-1, 3, 4, 5], dtype='float32')
+        index3 = paddle.static.data(
+            name='index3', shape=[-1, 2, 1], dtype='int32'
+        )
         output3 = paddle.gather_nd(x3, index3, name="gather_nd_layer")
 
 
@@ -182,11 +190,11 @@ class TestGatherNdOpRaise(unittest.TestCase):
     def test_check_raise(self):
         def check_raise_is_test():
             try:
-                x = fluid.layers.data(
-                    name='x', shape=[3, 4, 5], dtype='float32'
+                x = paddle.static.data(
+                    name='x', shape=[-1, 3, 4, 5], dtype='float32'
                 )
-                index = fluid.layers.data(
-                    name='index', shape=[2, 10], dtype='int32'
+                index = paddle.static.data(
+                    name='index', shape=[-1, 2, 10], dtype='int32'
                 )
                 output = paddle.gather_nd(x, index)
             except Exception as e:
@@ -231,13 +239,15 @@ class TestGatherNdError(unittest.TestCase):
 class TestGatherNdAPI2(unittest.TestCase):
     def test_static(self):
         with fluid.program_guard(fluid.Program(), fluid.Program()):
-            data1 = fluid.layers.data('data1', shape=[-1, 2], dtype='float64')
-            index = fluid.layers.data('index', shape=[-1, 1], dtype='int32')
+            data1 = paddle.static.data('data1', shape=[-1, 2], dtype='float64')
+            data1.desc.set_need_check_feed(False)
+            index = paddle.static.data('index', shape=[-1, 1], dtype='int32')
+            index.desc.set_need_check_feed(False)
             out = paddle.gather_nd(data1, index)
             place = fluid.CPUPlace()
             exe = fluid.Executor(place)
             input = np.array([[1, 2], [3, 4], [5, 6]])
-            index_1 = np.array([[1]])
+            index_1 = np.array([[1]]).astype('int32')
             (result,) = exe.run(
                 feed={"data1": input, "index": index_1}, fetch_list=[out]
             )
diff --git a/python/paddle/fluid/tests/unittests/test_gather_op.py b/python/paddle/fluid/tests/unittests/test_gather_op.py
index 44ab250c7652c223affcf5f3f15190d73256a2e6..2f2538769a3b38e5a750d0dabfa035c13fa89a8a 100644
--- a/python/paddle/fluid/tests/unittests/test_gather_op.py
+++ b/python/paddle/fluid/tests/unittests/test_gather_op.py
@@ -225,8 +225,10 @@ class TestGatherOp4(TestGatherOp1):
 class API_TestGather(unittest.TestCase):
     def test_out1(self):
         with fluid.program_guard(fluid.Program(), fluid.Program()):
-            data1 = fluid.layers.data('data1', shape=[-1, 2], dtype='float64')
-            index = fluid.layers.data('index', shape=[-1, 1], dtype='int32')
+            data1 = paddle.static.data('data1', shape=[-1, 2], dtype='float64')
+            data1.desc.set_need_check_feed(False)
+            index = paddle.static.data('index', shape=[-1, 1], dtype='int32')
+            index.desc.set_need_check_feed(False)
             out = paddle.gather(data1, index)
             place = fluid.CPUPlace()
             exe = fluid.Executor(place)
diff --git a/python/paddle/fluid/tests/unittests/test_gather_tree_op.py b/python/paddle/fluid/tests/unittests/test_gather_tree_op.py
index bcd319ed2d24a22ef86169b457429decdaabf286..79580339a0f9620b1b702dc2fa271ff5e9e0add9 100644
--- a/python/paddle/fluid/tests/unittests/test_gather_tree_op.py
+++ b/python/paddle/fluid/tests/unittests/test_gather_tree_op.py
@@ -18,7 +18,6 @@ import numpy as np
 from op_test import OpTest
 
 import paddle
-import paddle.fluid as fluid
 from paddle.fluid.framework import Program, program_guard
 
 
@@ -58,14 +57,11 @@ class TestGatherTreeOp(OpTest):
 class TestGatherTreeOpAPI(unittest.TestCase):
     def test_case(self):
         paddle.enable_static()
-        ids = fluid.layers.data(
-            name='ids', shape=[5, 2, 2], dtype='int64', append_batch_size=False
-        )
-        parents = fluid.layers.data(
+        ids = paddle.static.data(name='ids', shape=[5, 2, 2], dtype='int64')
+        parents = paddle.static.data(
             name='parents',
             shape=[5, 2, 2],
             dtype='int64',
-            append_batch_size=False,
         )
         final_sequences = paddle.nn.functional.gather_tree(ids, parents)
         paddle.disable_static()
@@ -84,17 +80,9 @@ class TestGatherTreeOpError(unittest.TestCase):
     def test_errors(self):
         paddle.enable_static()
         with program_guard(Program(), Program()):
-            ids = fluid.layers.data(
-                name='ids',
-                shape=[5, 2, 2],
-                dtype='int64',
-                append_batch_size=False,
-            )
-            parents = fluid.layers.data(
-                name='parents',
-                shape=[5, 2, 2],
-                dtype='int64',
-                append_batch_size=False,
+            ids = paddle.static.data(name='ids', shape=[5, 2, 2], dtype='int64')
+            parents = paddle.static.data(
+                name='parents', shape=[5, 2, 2], dtype='int64'
             )
 
             def test_Variable_ids():
@@ -113,11 +101,8 @@ class TestGatherTreeOpError(unittest.TestCase):
 
             def test_type_ids():
                 # dtype must be int32 or int64
-                bad_ids = fluid.layers.data(
-                    name='bad_ids',
-                    shape=[5, 2, 2],
-                    dtype='float32',
-                    append_batch_size=False,
+                bad_ids = paddle.static.data(
+                    name='bad_ids', shape=[5, 2, 2], dtype='float32'
                 )
                 paddle.nn.functional.gather_tree(bad_ids, parents)
 
@@ -125,33 +110,24 @@ class TestGatherTreeOpError(unittest.TestCase):
 
             def test_type_parents():
                 # dtype must be int32 or int64
-                bad_parents = fluid.layers.data(
-                    name='bad_parents',
-                    shape=[5, 2, 2],
-                    dtype='float32',
-                    append_batch_size=False,
+                bad_parents = paddle.static.data(
+                    name='bad_parents', shape=[5, 2, 2], dtype='float32'
                 )
                 paddle.nn.functional.gather_tree(ids, bad_parents)
 
             self.assertRaises(TypeError, test_type_parents)
 
             def test_ids_ndim():
-                bad_ids = fluid.layers.data(
-                    name='bad_test_ids',
-                    shape=[5, 2],
-                    dtype='int64',
-                    append_batch_size=False,
+                bad_ids = paddle.static.data(
+                    name='bad_test_ids', shape=[5, 2], dtype='int64'
                 )
                 paddle.nn.functional.gather_tree(bad_ids, parents)
 
             self.assertRaises(ValueError, test_ids_ndim)
 
             def test_parents_ndim():
-                bad_parents = fluid.layers.data(
-                    name='bad_test_parents',
-                    shape=[5, 2],
-                    dtype='int64',
-                    append_batch_size=False,
+                bad_parents = paddle.static.data(
+                    name='bad_test_parents', shape=[5, 2], dtype='int64'
                 )
                 paddle.nn.functional.gather_tree(ids, bad_parents)
 
diff --git a/python/paddle/fluid/tests/unittests/test_generator_dataloader.py b/python/paddle/fluid/tests/unittests/test_generator_dataloader.py
index 6d7b9914257620e20e001e277190a1f2f9ed97e3..fcbc91edee31eec09cb8cd57b94a19a7d39d7030 100644
--- a/python/paddle/fluid/tests/unittests/test_generator_dataloader.py
+++ b/python/paddle/fluid/tests/unittests/test_generator_dataloader.py
@@ -43,10 +43,12 @@ def simple_fc_net(places, use_legacy_py_reader, use_double_buffer):
 
     with fluid.unique_name.guard():
         with fluid.program_guard(main_prog, startup_prog):
-            image = fluid.layers.data(
-                name='image', shape=[784], dtype='float32'
+            image = paddle.static.data(
+                name='image', shape=[-1, 784], dtype='float32'
+            )
+            label = paddle.static.data(
+                name='label', shape=[-1, 1], dtype='int64'
             )
-            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
             py_reader = fluid.io.DataLoader.from_generator(
                 feed_list=[image, label],
                 capacity=4,
diff --git a/python/paddle/fluid/tests/unittests/test_group_norm_op.py b/python/paddle/fluid/tests/unittests/test_group_norm_op.py
index 52a233658a64e7e43eae5e1ab12160259dd5984e..01cd8108a3c8e001dde5ee6b9e5bd25c6906c313 100644
--- a/python/paddle/fluid/tests/unittests/test_group_norm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_group_norm_op.py
@@ -52,8 +52,8 @@ class TestGroupNormOpError(unittest.TestCase):
             self.assertRaises(TypeError, test_x_type)
 
             def test_x_dtype():
-                x2 = fluid.layers.data(
-                    name='x2', shape=[2, 100, 3, 5], dtype='int32'
+                x2 = paddle.static.data(
+                    name='x2', shape=[-1, 2, 100, 3, 5], dtype='int32'
                 )
                 groups = 2
                 paddle.static.nn.group_norm(x2, groups)
diff --git a/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py b/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py
index 6c5bc338a565b87a47bac3184ca97f4fed4483d6..75e5d1ee2ee158528bcc120b347614c0a3d3dc59 100644
--- a/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py
+++ b/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py
@@ -286,14 +286,14 @@ class TestHSigmoidOpSparse(OpTest):
 
 class TestHSigmoidOpWithSparseGrad(unittest.TestCase):
     def hs_net_conf(self, is_sparse):
-        input_word = fluid.layers.data(name="x", shape=[1], dtype='int64')
-        path_table = fluid.layers.data(
-            name='path_table', shape=[3], dtype='int64'
+        input_word = paddle.static.data(name="x", shape=[-1, 1], dtype='int64')
+        path_table = paddle.static.data(
+            name='path_table', shape=[-1, 3], dtype='int64'
         )
-        path_code = fluid.layers.data(
-            name='path_code', shape=[3], dtype='int64'
+        path_code = paddle.static.data(
+            name='path_code', shape=[-1, 3], dtype='int64'
         )
-        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+        label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
 
         data_list = [input_word, path_table, path_code, label]
 
diff --git a/python/paddle/fluid/tests/unittests/test_identity_loss_op.py b/python/paddle/fluid/tests/unittests/test_identity_loss_op.py
index 1a0ff98b176e55c214792f22ae679ee28b030335..d9b8ee8fad32812adf92194fec493fcddba361cc 100644
--- a/python/paddle/fluid/tests/unittests/test_identity_loss_op.py
+++ b/python/paddle/fluid/tests/unittests/test_identity_loss_op.py
@@ -103,7 +103,7 @@ class TestIdentityLossOpError(unittest.TestCase):
             self.assertRaises(Exception, test_string)
 
             def test_dtype():
-                x2 = fluid.layers.data(name='x2', shape=[1], dtype='int32')
+                x2 = paddle.static.data(name='x2', shape=[-1, 1], dtype='int32')
                 paddle.incubate.identity_loss(x=x2, reduction=1)
 
             self.assertRaises(TypeError, test_dtype)
diff --git a/python/paddle/fluid/tests/unittests/test_image_classification_layer.py b/python/paddle/fluid/tests/unittests/test_image_classification_layer.py
index c485a5182843447d051cf4406304eb91849e3b3e..7ffa8f5e53cff97484c360678fe36e55501d1f0b 100644
--- a/python/paddle/fluid/tests/unittests/test_image_classification_layer.py
+++ b/python/paddle/fluid/tests/unittests/test_image_classification_layer.py
@@ -39,8 +39,8 @@ class TestLayer(unittest.TestCase):
         main_program = Program()
         startup_program = Program()
         with fluid.program_guard(main_program, startup_program):
-            images = fluid.layers.data(
-                name='pixel', shape=[3, 48, 48], dtype='float32'
+            images = paddle.static.data(
+                name='pixel', shape=[-1, 3, 48, 48], dtype='float32'
             )
             hidden1 = paddle.static.nn.batch_norm(input=images)
             hidden2 = paddle.static.nn.fc(
@@ -54,8 +54,8 @@ class TestLayer(unittest.TestCase):
         main_program = Program()
         startup_program = Program()
         with fluid.program_guard(main_program, startup_program):
-            images = fluid.layers.data(
-                name='pixel', shape=[3, 48, 48], dtype='float32'
+            images = paddle.static.data(
+                name='pixel', shape=[-1, 3, 48, 48], dtype='float32'
             )
             paddle.nn.functional.dropout(x=images, p=0.5)
 
@@ -66,8 +66,8 @@ class TestLayer(unittest.TestCase):
         startup_program = Program()
 
         with fluid.program_guard(main_program, startup_program):
-            images = fluid.layers.data(
-                name='pixel', shape=[3, 48, 48], dtype='float32'
+            images = paddle.static.data(
+                name='pixel', shape=[-1, 3, 48, 48], dtype='float32'
             )
             conv1 = conv_block(images, 64, 2, [0.3, 0])
             conv_block(conv1, 256, 3, [0.4, 0.4, 0])
@@ -78,11 +78,11 @@ class TestLayer(unittest.TestCase):
         main_program = Program()
         startup_program = Program()
         with fluid.program_guard(main_program, startup_program):
-            image1 = fluid.layers.data(
-                name='pixel1', shape=[3, 48, 48], dtype='float32'
+            image1 = paddle.static.data(
+                name='pixel1', shape=[-1, 3, 48, 48], dtype='float32'
             )
-            image2 = fluid.layers.data(
-                name='pixel2', shape=[3, 48, 48], dtype='float32'
+            image2 = paddle.static.data(
+                name='pixel2', shape=[-1, 3, 48, 48], dtype='float32'
             )
             paddle.nn.functional.relu(paddle.add(x=image1, y=image2))
         print(main_program)
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py b/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py
index 21f95491fa69e9ae8863d1bbed62a4783b3a5639..f34c8d6a2a85832a6c3490ebfe05aa9f63f16189 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py
@@ -264,9 +264,9 @@ class TestDygraphDeepCF(unittest.TestCase):
 
         scope = fluid.core.Scope()
         with new_program_scope(main=main, startup=startup, scope=scope):
-            users = fluid.layers.data('users', [1], dtype='int32')
-            items = fluid.layers.data('items', [1], dtype='int32')
-            labels = fluid.layers.data('labels', [1], dtype='float32')
+            users = paddle.static.data('users', [-1, 1], dtype='int32')
+            items = paddle.static.data('items', [-1, 1], dtype='int32')
+            labels = paddle.static.data('labels', [-1, 1], dtype='float32')
 
             deepcf = DeepCF(num_users, num_items, matrix)
             prediction = deepcf(users, items)
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_framework.py b/python/paddle/fluid/tests/unittests/test_imperative_framework.py
index c0676fcc8524fd579c0647f2399dace335ece061..a14a508c6888e61176dc3260438e0a3fff60a161 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_framework.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_framework.py
@@ -56,9 +56,7 @@ class TestDygraphFramework(unittest.TestCase):
     def test_dygraph_backward(self):
         with new_program_scope():
             mlp = MLP(input_size=2)
-            var_inp = fluid.layers.data(
-                "input", shape=[2, 2], dtype="float32", append_batch_size=False
-            )
+            var_inp = paddle.static.data("input", shape=[2, 2], dtype="float32")
             out = mlp(var_inp)
             try:
                 out.backward()
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_gan.py b/python/paddle/fluid/tests/unittests/test_imperative_gan.py
index 0ab0703b16a09e2bb22238d7a30659ce720c2f1a..34806a8305a7246c457ac034f6a6af065727c6b3 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_gan.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_gan.py
@@ -70,12 +70,8 @@ class TestDygraphGAN(unittest.TestCase):
             discriminator = Discriminator()
             generator = Generator()
 
-            img = fluid.layers.data(
-                name="img", shape=[2, 1], append_batch_size=False
-            )
-            noise = fluid.layers.data(
-                name="noise", shape=[2, 2], append_batch_size=False
-            )
+            img = paddle.static.data(name="img", shape=[2, 1])
+            noise = paddle.static.data(name="noise", shape=[2, 2])
 
             d_real = discriminator(img)
             d_loss_real = paddle.mean(
@@ -106,9 +102,7 @@ class TestDygraphGAN(unittest.TestCase):
             discriminator = Discriminator()
             generator = Generator()
 
-            noise = fluid.layers.data(
-                name="noise", shape=[2, 2], append_batch_size=False
-            )
+            noise = paddle.static.data(name="noise", shape=[2, 2])
 
             d_fake = discriminator(generator(noise))
             g_loss = paddle.mean(
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_gnn.py b/python/paddle/fluid/tests/unittests/test_imperative_gnn.py
index 54dffdf72362182b81015f3e2a9b9b1927bf4d5b..25eafb2daaf566c25800771dfcb6bf32bee5e4a0 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_gnn.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_gnn.py
@@ -71,24 +71,15 @@ class TestDygraphGNN(unittest.TestCase):
 
         scope = fluid.core.Scope()
         with new_program_scope(main=main, startup=startup, scope=scope):
-            features = fluid.layers.data(
-                name='features',
-                shape=[1, 100, 50],
-                dtype='float32',
-                append_batch_size=False,
+            features = paddle.static.data(
+                name='features', shape=[1, 100, 50], dtype='float32'
             )
             # Use selected rows when it's supported.
-            adj = fluid.layers.data(
-                name='adj',
-                shape=[1, 100, 100],
-                dtype='float32',
-                append_batch_size=False,
+            adj = paddle.static.data(
+                name='adj', shape=[1, 100, 100], dtype='float32'
             )
-            labels = fluid.layers.data(
-                name='labels',
-                shape=[100, 1],
-                dtype='int64',
-                append_batch_size=False,
+            labels = paddle.static.data(
+                name='labels', shape=[100, 1], dtype='int64'
             )
 
             model = GCN('test_gcn', 50)
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py b/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py
index 0a775f75e71dac18cb4af16b6960227e0c8bd59c..0eb037bc6a02e40d8f2734dfd0b051ad996d4204 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py
@@ -163,11 +163,12 @@ class TestDygraphSimpleNet(unittest.TestCase):
 
                     exe = fluid.Executor(place)
                     sgd = SGDOptimizer(learning_rate=1e-3)
-                    x = fluid.layers.data(
+                    x = paddle.static.data(
                         name="x", shape=[-1, num_steps], dtype='int64'
                     )
-                    y = fluid.layers.data(name="y", shape=[-1, 1], dtype=dtype)
-
+                    x.desc.set_need_check_feed(False)
+                    y = paddle.static.data(name="y", shape=[-1, 1], dtype=dtype)
+                    y.desc.set_need_check_feed(False)
                     static_loss = simple_net(x, y)
                     sgd.minimize(static_loss)
                     static_param_updated = dict()
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_mnist.py b/python/paddle/fluid/tests/unittests/test_imperative_mnist.py
index 0e813147ad95504157e1fd8b40d82febd240b6cc..9c187d73b81e48b83cfd60e9a31e853d12d43ab3 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_mnist.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_mnist.py
@@ -195,10 +195,12 @@ class TestImperativeMnist(unittest.TestCase):
                 drop_last=True,
             )
 
-            img = fluid.layers.data(
-                name='pixel', shape=[1, 28, 28], dtype='float32'
+            img = paddle.static.data(
+                name='pixel', shape=[-1, 1, 28, 28], dtype='float32'
+            )
+            label = paddle.static.data(
+                name='label', shape=[-1, 1], dtype='int64'
             )
-            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
             cost = mnist(img)
             loss = paddle.nn.functional.cross_entropy(
                 cost, label, reduction='none', use_softmax=False
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_mnist_sorted_gradient.py b/python/paddle/fluid/tests/unittests/test_imperative_mnist_sorted_gradient.py
index 64594acb285b027eaef1161c00abbe7ebccd4a45..3acf62766165ffc471d865f559eddabd63800d75 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_mnist_sorted_gradient.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_mnist_sorted_gradient.py
@@ -98,10 +98,12 @@ class TestImperativeMnistSortGradient(unittest.TestCase):
                 paddle.dataset.mnist.train(), batch_size=128, drop_last=True
             )
 
-            img = fluid.layers.data(
-                name='pixel', shape=[1, 28, 28], dtype='float32'
+            img = paddle.static.data(
+                name='pixel', shape=[-1, 1, 28, 28], dtype='float32'
+            )
+            label = paddle.static.data(
+                name='label', shape=[-1, 1], dtype='int64'
             )
-            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
             cost = mnist(img)
             loss = paddle.nn.functional.cross_entropy(
                 cost, label, reduction='none', use_softmax=False
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py
index 6217ef0b1b5bfdaf06f76154878c16349dc6489f..12be3af2d9cf9164a7a865e4e4de79fd33299a22 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py
@@ -537,15 +537,19 @@ class TestDygraphOCRAttention(unittest.TestCase):
 
             optimizer = fluid.optimizer.SGD(learning_rate=0.001)
 
-            images = fluid.layers.data(
-                name='pixel', shape=Config.DATA_SHAPE, dtype='float32'
+            images = paddle.static.data(
+                name='pixel', shape=[-1] + Config.DATA_SHAPE, dtype='float32'
             )
-            static_label_in = fluid.layers.data(
-                name='label_in', shape=[1], dtype='int64', lod_level=0
+            images.desc.set_need_check_feed(False)
+            static_label_in = paddle.static.data(
+                name='label_in', shape=[-1, 1], dtype='int64', lod_level=0
             )
-            static_label_out = fluid.layers.data(
-                name='label_out', shape=[1], dtype='int64', lod_level=0
+            static_label_in.desc.set_need_check_feed(False)
+            static_label_out = paddle.static.data(
+                name='label_out', shape=[-1, 1], dtype='int64', lod_level=0
             )
+            static_label_out.desc.set_need_check_feed(False)
+
             static_label_out.stop_gradient = True
             static_label_out.trainable = False
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
index ab1490588088908abdc4938fce9a37577fdd3d26..31f066a1f186518d9f029e6423a428b4bf08f1ff 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
@@ -173,10 +173,12 @@ class TestImperativeOptimizerBase(unittest.TestCase):
                 paddle.dataset.mnist.train(), batch_size=128, drop_last=True
             )
 
-            img = fluid.layers.data(
-                name='pixel', shape=[1, 28, 28], dtype='float32'
+            img = paddle.static.data(
+                name='pixel', shape=[-1, 1, 28, 28], dtype='float32'
+            )
+            label = paddle.static.data(
+                name='label', shape=[-1, 1], dtype='int64'
             )
-            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
             img = paddle.reshape(img, shape=[batch_size, 784])
             cost = mlp(img)
             avg_loss = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py b/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py
index 2246cc25a22285ce87162613f24c0c96fc0d49aa..d038e89e1fb532fda882865f3e042d466c9ef855 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py
@@ -182,10 +182,12 @@ class TestImperativeOptimizerBase(unittest.TestCase):
                 paddle.dataset.mnist.train(), batch_size=128, drop_last=True
             )
 
-            img = fluid.layers.data(
-                name='pixel', shape=[1, 28, 28], dtype='float32'
+            img = paddle.static.data(
+                name='pixel', shape=[-1, 1, 28, 28], dtype='float32'
+            )
+            label = paddle.static.data(
+                name='label', shape=[-1, 1], dtype='int64'
             )
-            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
             img = paddle.reshape(img, shape=[batch_size, 784])
             cost = mlp(img)
             avg_loss = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
index b24b24ebc6255527a4b6dd575f86fe044623f64e..6eb0c9d6e6c0345d9cedd8273c5e4b2bbaeacee6 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
@@ -328,16 +328,20 @@ class TestDygraphPtbRnn(unittest.TestCase):
                 else fluid.CUDAPlace(0)
             )
             sgd = SGDOptimizer(learning_rate=1e-3)
-            x = fluid.layers.data(
+            x = paddle.static.data(
                 name="x", shape=[-1, num_steps], dtype='int64'
             )
-            y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32')
-            init_hidden = fluid.layers.data(
-                name="init_hidden", shape=[1], dtype='float32'
+            x.desc.set_need_check_feed(False)
+            y = paddle.static.data(name="y", shape=[-1, 1], dtype='float32')
+            y.desc.set_need_check_feed(False)
+            init_hidden = paddle.static.data(
+                name="init_hidden", shape=[-1, 1], dtype='float32'
             )
-            init_cell = fluid.layers.data(
-                name="init_cell", shape=[1], dtype='float32'
+            init_hidden.desc.set_need_check_feed(False)
+            init_cell = paddle.static.data(
+                name="init_cell", shape=[-1, 1], dtype='float32'
             )
+            init_cell.desc.set_need_check_feed(False)
 
             static_loss, static_last_hidden, static_last_cell = ptb_model(
                 x, y, init_hidden, init_cell
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn_sorted_gradient.py b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn_sorted_gradient.py
index 01b1d18070e1480ae22f493ecad9ac0afa7373ff..a586c98de1e075e134e4c5ce41821fde5fabf463 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn_sorted_gradient.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn_sorted_gradient.py
@@ -116,16 +116,20 @@ class TestDygraphPtbRnnSortGradient(unittest.TestCase):
                 else fluid.CUDAPlace(0)
             )
             sgd = SGDOptimizer(learning_rate=1e-3)
-            x = fluid.layers.data(
+            x = paddle.static.data(
                 name="x", shape=[-1, num_steps, 1], dtype='int64'
             )
-            y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32')
-            init_hidden = fluid.layers.data(
-                name="init_hidden", shape=[1], dtype='float32'
+            x.desc.set_need_check_feed(False)
+            y = paddle.static.data(name="y", shape=[-1, 1], dtype='float32')
+            y.desc.set_need_check_feed(False)
+            init_hidden = paddle.static.data(
+                name="init_hidden", shape=[-1, 1], dtype='float32'
             )
-            init_cell = fluid.layers.data(
-                name="init_cell", shape=[1], dtype='float32'
+            init_hidden.desc.set_need_check_feed(False)
+            init_cell = paddle.static.data(
+                name="init_cell", shape=[-1, 1], dtype='float32'
             )
+            init_cell.desc.set_need_check_feed(False)
 
             static_loss, static_last_hidden, static_last_cell = ptb_model(
                 x, y, init_hidden, init_cell
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_recurrent_usage.py b/python/paddle/fluid/tests/unittests/test_imperative_recurrent_usage.py
index e40cb632065fa58e66a9c40ee95921e4901b5d2b..2d57340b954eeeca3628dc2868715e2f677466c6 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_recurrent_usage.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_recurrent_usage.py
@@ -80,12 +80,8 @@ class TestRecurrentFeed(unittest.TestCase):
         with new_program_scope():
             fluid.default_startup_program().random_seed = seed
             fluid.default_main_program().random_seed = seed
-            in1 = fluid.layers.data(
-                name="inp1", shape=[2, 2], append_batch_size=False
-            )
-            in2 = fluid.layers.data(
-                name="inp2", shape=[2, 2], append_batch_size=False
-            )
+            in1 = paddle.static.data(name="inp1", shape=[2, 2])
+            in2 = paddle.static.data(name="inp2", shape=[2, 2])
             rt1 = RecurrentTest("RecurrentTest")
             static_sum_out, static_out = rt1(in1, in2)
             fluid.backward.append_backward(static_sum_out)
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py b/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py
index 1c548f5caea48863c31a511c90b88f4806e5954b..88440c0cf22d163e31f63d385ad6ec2bf75a9018 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py
@@ -126,14 +126,14 @@ class TestImperativeMnist(unittest.TestCase):
 
             st_sgd = SGDOptimizer(learning_rate=1e-3)
 
-            st_state = fluid.layers.data(
-                name='st_state', shape=[4], dtype='float32'
+            st_state = paddle.static.data(
+                name='st_state', shape=[-1, 4], dtype='float32'
             )
-            st_reward = fluid.layers.data(
-                name='st_reward', shape=[1], dtype='float32'
+            st_reward = paddle.static.data(
+                name='st_reward', shape=[-1, 1], dtype='float32'
             )
-            st_mask = fluid.layers.data(
-                name='st_mask', shape=[2], dtype='float32'
+            st_mask = paddle.static.data(
+                name='st_mask', shape=[-1, 2], dtype='float32'
             )
 
             st_loss_probs = policy(st_state)
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py
index b20ceb091b017f9e55d8bf6aace2086831607d61..bc46ad12d3df0d7c78f1f7085f557d58ca4faa6f 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py
@@ -361,10 +361,12 @@ class TestDygraphResnet(unittest.TestCase):
                 batch_size=batch_size,
             )
 
-            img = fluid.layers.data(
-                name='pixel', shape=[3, 224, 224], dtype='float32'
+            img = paddle.static.data(
+                name='pixel', shape=[-1, 3, 224, 224], dtype='float32'
+            )
+            label = paddle.static.data(
+                name='label', shape=[-1, 1], dtype='int64'
             )
-            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
             out = resnet(img)
             loss = paddle.nn.functional.cross_entropy(
                 input=out, label=label, reduction='none', use_softmax=False
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_resnet_sorted_gradient.py b/python/paddle/fluid/tests/unittests/test_imperative_resnet_sorted_gradient.py
index 798890a4898e8aef61faaa0ba245f525d0d2f471..9d6d2ebabd12ee61721186b8cfef0ea763426285 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_resnet_sorted_gradient.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_resnet_sorted_gradient.py
@@ -170,10 +170,12 @@ class TestDygraphResnetSortGradient(unittest.TestCase):
                 batch_size=batch_size,
             )
 
-            img = fluid.layers.data(
-                name='pixel', shape=[3, 224, 224], dtype='float32'
+            img = paddle.static.data(
+                name='pixel', shape=[-1, 3, 224, 224], dtype='float32'
+            )
+            label = paddle.static.data(
+                name='label', shape=[-1, 1], dtype='int64'
             )
-            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
             out = resnet(img)
             loss = paddle.nn.functional.cross_entropy(
                 input=out, label=label, reduction='none', use_softmax=False
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py b/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py
index b02e1595dbe1ca150fdcedc80c2424a06c60b72c..6c95805602f64a525c938c0e55d39acc6fefadcf 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py
@@ -444,10 +444,12 @@ class TestImperativeResneXt(unittest.TestCase):
                 drop_last=True,
             )
 
-            img = fluid.layers.data(
-                name='pixel', shape=[3, 224, 224], dtype='float32'
+            img = paddle.static.data(
+                name='pixel', shape=[-1, 3, 224, 224], dtype='float32'
+            )
+            label = paddle.static.data(
+                name='label', shape=[-1, 1], dtype='int64'
             )
-            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
             out = se_resnext(img)
             softmax_out = paddle.nn.function.softmax(out)
             loss = paddle.nn.functional.cross_entropy(
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py
index cd707bb0ca6420139e500bd21d794e6802cd1bbf..647710fba61f1f4a0eef33840431cfa58e9b5544 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py
@@ -173,11 +173,12 @@ class TestDygraphSimpleNet(unittest.TestCase):
 
                     exe = fluid.Executor(place)
                     sgd = SGDOptimizer(learning_rate=1e-3)
-                    x = fluid.layers.data(
+                    x = paddle.static.data(
                         name="x", shape=[-1, num_steps], dtype='int64'
                     )
-                    y = fluid.layers.data(name="y", shape=[-1, 1], dtype=dtype)
-
+                    x.desc.set_need_check_feed(False)
+                    y = paddle.static.data(name="y", shape=[-1, 1], dtype=dtype)
+                    y.desc.set_need_check_feed(False)
                     static_loss = simple_net(x, y)
                     sgd.minimize(static_loss)
                     static_param_updated = dict()
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py
index 8e213c2d2a2b865fe2384c243a4888fb7fa68785..e171899289aa4be06df5dd9c02d4a1fd4c69f169 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py
@@ -221,14 +221,13 @@ def make_all_inputs(input_fields):
     """
     inputs = []
     for input_field in input_fields:
-        input_var = fluid.layers.data(
+        input_var = paddle.static.data(
             name=input_field,
             shape=input_descs[input_field][0],
             dtype=input_descs[input_field][1],
             lod_level=input_descs[input_field][2]
             if len(input_descs[input_field]) == 3
             else 0,
-            append_batch_size=False,
         )
         inputs.append(input_var)
     return inputs
diff --git a/python/paddle/fluid/tests/unittests/test_index_select_op.py b/python/paddle/fluid/tests/unittests/test_index_select_op.py
index 39895f2691ca3f774b60323734d32d3ec4c88491..2234280939cecf37512f6cc927f32101f723222f 100644
--- a/python/paddle/fluid/tests/unittests/test_index_select_op.py
+++ b/python/paddle/fluid/tests/unittests/test_index_select_op.py
@@ -97,10 +97,8 @@ class TestIndexSelectAPI(unittest.TestCase):
 
         # case 1:
         with program_guard(Program(), Program()):
-            x = fluid.layers.data(name='x', shape=[-1, 4])
-            index = fluid.layers.data(
-                name='index', shape=[3], dtype='int32', append_batch_size=False
-            )
+            x = paddle.static.data(name='x', shape=[-1, 4])
+            index = paddle.static.data(name='index', shape=[3], dtype='int32')
             z = paddle.index_select(x, index, axis=1)
             exe = fluid.Executor(fluid.CPUPlace())
             (res,) = exe.run(
@@ -115,10 +113,8 @@ class TestIndexSelectAPI(unittest.TestCase):
 
         # case 2:
         with program_guard(Program(), Program()):
-            x = fluid.layers.data(name='x', shape=[-1, 4])
-            index = fluid.layers.data(
-                name='index', shape=[3], dtype='int32', append_batch_size=False
-            )
+            x = paddle.static.data(name='x', shape=[-1, 4])
+            index = paddle.static.data(name='index', shape=[3], dtype='int32')
             z = paddle.index_select(x, index)
             exe = fluid.Executor(fluid.CPUPlace())
             (res,) = exe.run(
diff --git a/python/paddle/fluid/tests/unittests/test_inference_model_io.py b/python/paddle/fluid/tests/unittests/test_inference_model_io.py
index f6b3ba6b69539ca1d36606b4ec2fd38f48b1a938..fd8523bfc15ea5681ec44e0fff95859858e0b23f 100644
--- a/python/paddle/fluid/tests/unittests/test_inference_model_io.py
+++ b/python/paddle/fluid/tests/unittests/test_inference_model_io.py
@@ -24,7 +24,6 @@ import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 import paddle.fluid.executor as executor
-import paddle.fluid.layers as layers
 import paddle.fluid.optimizer as optimizer
 from paddle.distributed.io import load_inference_model_distributed
 from paddle.fluid.compiler import CompiledProgram
@@ -55,8 +54,8 @@ class TestBook(unittest.TestCase):
         program = Program()
 
         with program_guard(program, init_program):
-            x = layers.data(name='x', shape=[2], dtype='float32')
-            y = layers.data(name='y', shape=[1], dtype='float32')
+            x = paddle.static.data(name='x', shape=[-1, 2], dtype='float32')
+            y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
 
             y_predict = paddle.static.nn.fc(x=x, size=1, activation=None)
 
@@ -163,8 +162,8 @@ class TestSaveInferenceModel(unittest.TestCase):
 
         # fake program without feed/fetch
         with program_guard(program, init_program):
-            x = layers.data(name='x', shape=[2], dtype='float32')
-            y = layers.data(name='y', shape=[1], dtype='float32')
+            x = paddle.static.data(name='x', shape=[-1, 2], dtype='float32')
+            y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
 
             y_predict = paddle.static.nn.fc(x, size=1, activation=None)
 
@@ -188,8 +187,8 @@ class TestSaveInferenceModel(unittest.TestCase):
 
         # fake program without feed/fetch
         with program_guard(program, init_program):
-            x = layers.data(name='x', shape=[2], dtype='float32')
-            y = layers.data(name='y', shape=[1], dtype='int32')
+            x = paddle.static.data(name='x', shape=[-1, 2], dtype='float32')
+            y = paddle.static.data(name='y', shape=[-1, 1], dtype='int32')
             predict = paddle.static.nn.fc(x, size=2, activation='softmax')
             acc = paddle.static.accuracy(input=predict, label=y)
             auc_var, batch_auc_var, auc_states = paddle.static.auc(
@@ -223,8 +222,8 @@ class TestInstance(unittest.TestCase):
 
         # fake program without feed/fetch
         with program_guard(program, init_program):
-            x = layers.data(name='x', shape=[2], dtype='float32')
-            y = layers.data(name='y', shape=[1], dtype='float32')
+            x = paddle.static.data(name='x', shape=[-1, 2], dtype='float32')
+            y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
 
             y_predict = paddle.static.nn.fc(x, size=1, activation=None)
 
@@ -261,8 +260,8 @@ class TestSaveInferenceModelNew(unittest.TestCase):
 
         # fake program without feed/fetch
         with program_guard(program, init_program):
-            x = layers.data(name='x', shape=[2], dtype='float32')
-            y = layers.data(name='y', shape=[1], dtype='float32')
+            x = paddle.static.data(name='x', shape=[-1, 2], dtype='float32')
+            y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
 
             y_predict = paddle.static.nn.fc(x, size=1, activation=None)
 
@@ -440,8 +439,8 @@ class TestSaveInferenceModelNew(unittest.TestCase):
 
         # fake program without feed/fetch
         with program_guard(program, init_program):
-            x = layers.data(name='x', shape=[2], dtype='float32')
-            y = layers.data(name='y', shape=[1], dtype='float32')
+            x = paddle.static.data(name='x', shape=[-1, 2], dtype='float32')
+            y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
 
             y_predict = paddle.static.nn.fc(x, size=1, activation=None)
 
@@ -489,8 +488,8 @@ class TestSaveInferenceModelNew(unittest.TestCase):
 
         # fake program without feed/fetch
         with program_guard(program, init_program):
-            x = layers.data(name='x', shape=[2], dtype='float32')
-            y = layers.data(name='y', shape=[1], dtype='float32')
+            x = paddle.static.data(name='x', shape=[-1, 2], dtype='float32')
+            y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
 
             y_predict = paddle.static.nn.fc(x, size=1, activation=None)
 
diff --git a/python/paddle/fluid/tests/unittests/test_inplace_abn_op.py b/python/paddle/fluid/tests/unittests/test_inplace_abn_op.py
index 299d3218cfdba26ae9094f1441ee7be55bc4d403..a4cc038bd7a52840814f44365f9db5b3b811e4b7 100644
--- a/python/paddle/fluid/tests/unittests/test_inplace_abn_op.py
+++ b/python/paddle/fluid/tests/unittests/test_inplace_abn_op.py
@@ -48,13 +48,13 @@ class TestInplaceANBOpTraining(unittest.TestCase):
         startup.random_seed = seed
         with fluid.unique_name.guard():
             with fluid.program_guard(main, startup):
-                data = fluid.layers.data(
+                data = paddle.static.data(
                     name='input',
                     shape=self.dshape,
                     dtype=self.dtype,
-                    append_batch_size=False,
-                    stop_gradient=False,
                 )
+                data.stop_gradient = False
+                data.desc.set_need_check_feed(False)
 
                 bn = paddle.static.nn.batch_norm(
                     data,
diff --git a/python/paddle/fluid/tests/unittests/test_inplace_softmax_with_cross_entropy.py b/python/paddle/fluid/tests/unittests/test_inplace_softmax_with_cross_entropy.py
index 16477e086daf11a39cbb9e6b4053a65ebb1402ca..b614709ec996673b62a359ca52f69841486ec314 100644
--- a/python/paddle/fluid/tests/unittests/test_inplace_softmax_with_cross_entropy.py
+++ b/python/paddle/fluid/tests/unittests/test_inplace_softmax_with_cross_entropy.py
@@ -37,18 +37,18 @@ class TestSoftmaxWithXe(unittest.TestCase):
         m, n = x.shape
         with fluid.program_guard(fluid.Program(), fluid.Program()):
             with fluid.scope_guard(fluid.Scope()):
-                x_d = fluid.layers.data(
+                x_d = paddle.static.data(
                     name='x',
                     shape=[m, n],
                     dtype=self.dtype,
-                    append_batch_size=False,
                 )
-                y_d = fluid.layers.data(
+                x_d.desc.set_need_check_feed(False)
+                y_d = paddle.static.data(
                     name='y',
                     shape=[m, 1] if not self.soft_label else [m, n],
                     dtype='int64' if not self.soft_label else self.dtype,
-                    append_batch_size=False,
                 )
+                y_d.desc.set_need_check_feed(False)
                 z_d, s_d = paddle.nn.functional.softmax_with_cross_entropy(
                     x_d,
                     y_d,
diff --git a/python/paddle/fluid/tests/unittests/test_instance_norm_op.py b/python/paddle/fluid/tests/unittests/test_instance_norm_op.py
index aac98fec5c43553e035850b6bd57a72fc548b102..48932e5581b24aec2017042df5d4b6d17e6fb2a9 100644
--- a/python/paddle/fluid/tests/unittests/test_instance_norm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_instance_norm_op.py
@@ -243,7 +243,9 @@ class TestInstanceNormOpError(unittest.TestCase):
             self.assertRaises(TypeError, paddle.static.nn.instance_norm, x1)
 
             # the input dtype of instance_norm must be float32 or float64
-            x2 = fluid.layers.data(name='x2', shape=[3, 4, 5, 6], dtype="int32")
+            x2 = paddle.static.data(
+                name='x2', shape=[-1, 3, 4, 5, 6], dtype="int32"
+            )
             self.assertRaises(TypeError, paddle.static.nn.instance_norm, x2)
 
 
@@ -251,9 +253,7 @@ class TestInstanceNormOpErrorCase1(unittest.TestCase):
     def test_errors(self):
         with program_guard(Program(), Program()):
             # the first dimension of input for instance_norm must between [2d, 5d]
-            x = fluid.layers.data(
-                name='x', shape=[3], dtype="float32", append_batch_size=False
-            )
+            x = paddle.static.data(name='x', shape=[3], dtype="float32")
             self.assertRaises(ValueError, paddle.static.nn.instance_norm, x)
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_ir_inplace_pass.py b/python/paddle/fluid/tests/unittests/test_ir_inplace_pass.py
index 48156b7448218ab67ebfe69191cfa5b02dfa0b84..bc4ef3d386ccb2f0b0c94890b92382867bebe90f 100644
--- a/python/paddle/fluid/tests/unittests/test_ir_inplace_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_ir_inplace_pass.py
@@ -24,8 +24,8 @@ import paddle.fluid.core as core
 
 
 def fc_with_batchnorm(use_feed):
-    img = fluid.layers.data(name='image', shape=[784], dtype='float32')
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    img = paddle.static.data(name='image', shape=[-1, 784], dtype='float32')
+    label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
 
     hidden = img
     for _ in range(3):
diff --git a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_pass.py b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_pass.py
index 9e51118e96c4c9fda4187b1046a9d90f943d4b40..b64090996b3a2fd3de1ccf2bd135fb9b2ac4ea6f 100644
--- a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_pass.py
@@ -18,13 +18,12 @@ import numpy as np
 from parallel_executor_test_base import DeviceType, TestParallelExecutorBase
 
 import paddle
-import paddle.fluid as fluid
 import paddle.fluid.core as core
 
 
 def _feed_data_helper():
-    img = fluid.layers.data(name='image', shape=[784], dtype='float32')
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    img = paddle.static.data(name='image', shape=[-1, 784], dtype='float32')
+    label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
     return img, label
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_lambv2_op.py b/python/paddle/fluid/tests/unittests/test_lambv2_op.py
index f8f65f5a14a4787bf3522319af80a144739551fc..f7aa3f6fbd92af62499631cc6c0d2fa5611bb37e 100644
--- a/python/paddle/fluid/tests/unittests/test_lambv2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lambv2_op.py
@@ -123,8 +123,10 @@ class TestLambOpWithCombinedOp(unittest.TestCase):
             with fluid.program_guard(main, startup):
                 main.random_seed = seed
                 startup.random_seed = seed
-                x = fluid.layers.data(name='X', shape=[13], dtype='float32')
-                y = fluid.layers.data(name='Y', shape=[1], dtype='float32')
+                x = paddle.static.data(
+                    name='X', shape=[-1, 13], dtype='float32'
+                )
+                y = paddle.static.data(name='Y', shape=[-1, 1], dtype='float32')
                 prediction = paddle.static.nn.fc(x, size=1, activation=None)
                 loss = paddle.nn.functional.square_error_cost(
                     input=prediction, label=y
diff --git a/python/paddle/fluid/tests/unittests/test_layer_norm_op.py b/python/paddle/fluid/tests/unittests/test_layer_norm_op.py
index 0240b411b047897514e18fc65119464619075ad2..0878f468074500fb23819b36b8667562097356d2 100644
--- a/python/paddle/fluid/tests/unittests/test_layer_norm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_layer_norm_op.py
@@ -333,12 +333,7 @@ class TestLayerNormOp(unittest.TestCase):
 
 class TestLayerNormAPI(unittest.TestCase):
     def test_case(self):
-        x = fluid.layers.data(
-            name='x',
-            shape=[64, 32, 256],
-            dtype='float32',
-            append_batch_size=False,
-        )
+        x = paddle.static.data(name='x', shape=[64, 32, 256], dtype='float32')
         x = paddle.static.nn.layer_norm(
             x,
             scale=True,
@@ -380,7 +375,9 @@ class TestDygraphLayerNormAPIError(unittest.TestCase):
 
             # the input dtype of LayerNorm must be float32 or float64
             # float16 only can be set on GPU place
-            x2 = fluid.layers.data(name='x2', shape=[3, 32, 32], dtype="int32")
+            x2 = paddle.static.data(
+                name='x2', shape=[-1, 3, 32, 32], dtype="int32"
+            )
             self.assertRaises(TypeError, layer_norm, x2)
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py
index e33cde31799adc376fe4160e8bfbd027c46b8811..51715e2ae1ce28ecaf6fb161332bbf5144891631 100644
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -108,11 +108,10 @@ class TestLayer(LayerTest):
     def test_dropout(self):
         inp = np.ones([3, 32, 32], dtype='float32')
         with self.static_graph():
-            t = layers.data(
+            t = paddle.static.data(
                 name='data',
                 shape=[3, 32, 32],
                 dtype='float32',
-                append_batch_size=False,
             )
             dropout = paddle.nn.Dropout(p=0.35)
             ret = dropout(t)
@@ -135,11 +134,8 @@ class TestLayer(LayerTest):
     def test_linear(self):
         inp = np.ones([3, 32, 32], dtype='float32')
         with self.static_graph():
-            t = layers.data(
-                name='data',
-                shape=[3, 32, 32],
-                dtype='float32',
-                append_batch_size=False,
+            t = paddle.static.data(
+                name='data', shape=[3, 32, 32], dtype='float32'
             )
             linear = paddle.nn.Linear(
                 32, 4, bias_attr=fluid.initializer.ConstantInitializer(value=1)
@@ -228,11 +224,8 @@ class TestLayer(LayerTest):
     def test_Flatten(self):
         inp = np.ones([3, 4, 4, 5], dtype='float32')
         with self.static_graph():
-            t = layers.data(
-                name='data',
-                shape=[3, 4, 4, 5],
-                dtype='float32',
-                append_batch_size=False,
+            t = paddle.static.data(
+                name='data', shape=[3, 4, 4, 5], dtype='float32'
             )
             flatten = paddle.nn.Flatten()
             ret = flatten(t)
@@ -277,7 +270,9 @@ class TestLayer(LayerTest):
     def test_SyncBatchNorm(self):
         if core.is_compiled_with_cuda():
             with self.static_graph():
-                t = layers.data(name='t', shape=[-1, 3, 5, 5], dtype='float32')
+                t = paddle.static.data(
+                    name='t', shape=[-1, 3, 5, 5], dtype='float32'
+                )
                 my_sync_bn = paddle.nn.SyncBatchNorm(3)
                 ret = my_sync_bn(t)
                 static_ret = self.get_static_graph_result(
@@ -294,7 +289,7 @@ class TestLayer(LayerTest):
 
     def test_relu(self):
         with self.static_graph():
-            t = layers.data(name='t', shape=[3, 3], dtype='float32')
+            t = paddle.static.data(name='t', shape=[-1, 3, 3], dtype='float32')
             ret = F.relu(t)
             static_ret = self.get_static_graph_result(
                 feed={'t': np.ones([3, 3], dtype='float32')}, fetch_list=[ret]
@@ -309,8 +304,10 @@ class TestLayer(LayerTest):
 
     def test_matmul(self):
         with self.static_graph():
-            t = layers.data(name='t', shape=[3, 3], dtype='float32')
-            t2 = layers.data(name='t2', shape=[3, 3], dtype='float32')
+            t = paddle.static.data(name='t', shape=[-1, 3, 3], dtype='float32')
+            t2 = paddle.static.data(
+                name='t2', shape=[-1, 3, 3], dtype='float32'
+            )
             ret = paddle.matmul(t, t2)
             static_ret = self.get_static_graph_result(
                 feed={
@@ -337,12 +334,22 @@ class TestLayer(LayerTest):
         n6 = np.ones([3, 3], dtype='float32') * 5
 
         with self.static_graph():
-            t = layers.data(name='t', shape=[3, 3], dtype='float32')
-            t2 = layers.data(name='t2', shape=[3, 3], dtype='float32')
-            t3 = layers.data(name='t3', shape=[3, 3], dtype='float32')
-            t4 = layers.data(name='t4', shape=[3, 3], dtype='float32')
-            t5 = layers.data(name='t5', shape=[3, 3], dtype='float32')
-            t6 = layers.data(name='t6', shape=[3, 3], dtype='float32')
+            t = paddle.static.data(name='t', shape=[-1, 3, 3], dtype='float32')
+            t2 = paddle.static.data(
+                name='t2', shape=[-1, 3, 3], dtype='float32'
+            )
+            t3 = paddle.static.data(
+                name='t3', shape=[-1, 3, 3], dtype='float32'
+            )
+            t4 = paddle.static.data(
+                name='t4', shape=[-1, 3, 3], dtype='float32'
+            )
+            t5 = paddle.static.data(
+                name='t5', shape=[-1, 3, 3], dtype='float32'
+            )
+            t6 = paddle.static.data(
+                name='t6', shape=[-1, 3, 3], dtype='float32'
+            )
 
             ret = paddle.add(t, t2)
             ret = paddle.pow(ret, t3)
@@ -381,7 +388,9 @@ class TestLayer(LayerTest):
     def test_conv2d_transpose(self):
         inp_np = np.arange(0, 24).reshape([2, 3, 2, 2]).astype('float32')
         with self.static_graph():
-            img = layers.data(name='pixel', shape=[3, 2, 2], dtype='float32')
+            img = paddle.static.data(
+                name='pixel', shape=[-1, 3, 2, 2], dtype='float32'
+            )
             out = paddle.static.nn.conv2d_transpose(
                 input=img,
                 num_filters=10,
@@ -393,7 +402,9 @@ class TestLayer(LayerTest):
                 feed={'pixel': inp_np}, fetch_list=[out]
             )[0]
         with self.static_graph():
-            img = layers.data(name='pixel', shape=[3, 2, 2], dtype='float32')
+            img = paddle.static.data(
+                name='pixel', shape=[-1, 3, 2, 2], dtype='float32'
+            )
             conv2d_transpose = paddle.nn.Conv2DTranspose(
                 3,
                 10,
@@ -473,8 +484,8 @@ class TestLayer(LayerTest):
             # the input dtype of Conv2DTranspose must be float16 or float32 or float64
             # float16 only can be set on GPU place
             def test_type():
-                images = layers.data(
-                    name='pixel', shape=[3, 5, 5], dtype='int32'
+                images = paddle.static.data(
+                    name='pixel', shape=[-1, 3, 5, 5], dtype='int32'
                 )
                 conv2d = paddle.nn.Conv2DTranspose(3, 3, [2, 2])
                 conv2d_ret2 = conv2d(images)
@@ -486,12 +497,8 @@ class TestLayer(LayerTest):
         inp_np_y = np.array([[4, 5, 6]]).astype('float32')
 
         with self.static_graph():
-            data_x = layers.data(
-                name='x', shape=[1, 3], dtype="float32", append_batch_size=False
-            )
-            data_y = layers.data(
-                name='y', shape=[1, 3], dtype="float32", append_batch_size=False
-            )
+            data_x = paddle.static.data(name='x', shape=[1, 3], dtype="float32")
+            data_y = paddle.static.data(name='y', shape=[1, 3], dtype="float32")
             out = paddle.static.nn.common.bilinear_tensor_product(
                 data_x,
                 data_y,
@@ -505,12 +512,8 @@ class TestLayer(LayerTest):
             )[0]
 
         with self.static_graph():
-            data_x = layers.data(
-                name='x', shape=[1, 3], dtype="float32", append_batch_size=False
-            )
-            data_y = layers.data(
-                name='y', shape=[1, 3], dtype="float32", append_batch_size=False
-            )
+            data_x = paddle.static.data(name='x', shape=[1, 3], dtype="float32")
+            data_y = paddle.static.data(name='y', shape=[1, 3], dtype="float32")
             btp = paddle.nn.Bilinear(
                 3,
                 3,
@@ -542,11 +545,11 @@ class TestLayer(LayerTest):
             dy_rlt2_value = dy_rlt2.numpy()
 
         with self.static_graph():
-            data_x2 = layers.data(
-                name='x', shape=[1, 3], dtype="float32", append_batch_size=False
+            data_x2 = paddle.static.data(
+                name='x', shape=[1, 3], dtype="float32"
             )
-            data_y2 = layers.data(
-                name='y', shape=[1, 3], dtype="float32", append_batch_size=False
+            data_y2 = paddle.static.data(
+                name='y', shape=[1, 3], dtype="float32"
             )
             out2 = paddle.static.nn.common.bilinear_tensor_product(
                 data_x2, data_y2, 6, act='sigmoid'
@@ -599,7 +602,10 @@ class TestLayer(LayerTest):
         inp_word = np.array([[[1]]]).astype('int64')
         dict_size = 20
         with self.static_graph():
-            data_t = layers.data(name='word', shape=[1], dtype='int64')
+            data_t = paddle.static.data(
+                name='word', shape=[-1, 1], dtype='int64'
+            )
+            data_t.desc.set_need_check_feed(False)
             emb = layers.embedding(
                 input=data_t,
                 size=[dict_size, 32],
@@ -610,7 +616,10 @@ class TestLayer(LayerTest):
                 feed={'word': inp_word}, fetch_list=[emb]
             )[0]
         with self.static_graph():
-            data_t = layers.data(name='word', shape=[1], dtype='int64')
+            data_t = paddle.static.data(
+                name='word', shape=[-1, 1], dtype='int64'
+            )
+            data_t.desc.set_need_check_feed(False)
             emb2 = paddle.nn.Embedding(
                 dict_size, 32, weight_attr='emb.w', sparse=False
             )
@@ -693,8 +702,8 @@ class TestLayer(LayerTest):
 
     def test_conv3d(self):
         with self.static_graph():
-            images = layers.data(
-                name='pixel', shape=[3, 6, 6, 6], dtype='float32'
+            images = paddle.static.data(
+                name='pixel', shape=[-1, 3, 6, 6, 6], dtype='float32'
             )
             ret = paddle.static.nn.conv3d(
                 input=images, num_filters=3, filter_size=2
@@ -705,8 +714,8 @@ class TestLayer(LayerTest):
             )[0]
 
         with self.static_graph():
-            images = layers.data(
-                name='pixel', shape=[3, 6, 6, 6], dtype='float32'
+            images = paddle.static.data(
+                name='pixel', shape=[-1, 3, 6, 6, 6], dtype='float32'
             )
             conv3d = paddle.nn.Conv3D(
                 in_channels=3, out_channels=3, kernel_size=2
@@ -783,12 +792,8 @@ class TestLayer(LayerTest):
         input = np.random.random(shape).astype('float32')
 
         with self.static_graph():
-            X = fluid.layers.data(
-                name='X',
-                shape=shape,
-                dtype='float32',
-                lod_level=1,
-                append_batch_size=False,
+            X = paddle.static.data(
+                name='X', shape=shape, dtype='float32', lod_level=1
             )
             ret = paddle.static.nn.group_norm(
                 input=X,
@@ -807,12 +812,8 @@ class TestLayer(LayerTest):
             )[0]
 
         with self.static_graph():
-            X = fluid.layers.data(
-                name='X',
-                shape=shape,
-                dtype='float32',
-                lod_level=1,
-                append_batch_size=False,
+            X = paddle.static.data(
+                name='X', shape=shape, dtype='float32', lod_level=1
             )
             groupNorm = paddle.nn.GroupNorm(
                 num_channels=shape[1],
@@ -855,18 +856,14 @@ class TestLayer(LayerTest):
         input = np.random.random(shape).astype('float32')
 
         with self.static_graph():
-            X = fluid.layers.data(
-                name='X', shape=shape, dtype='float32', append_batch_size=False
-            )
+            X = paddle.static.data(name='X', shape=shape, dtype='float32')
             ret = paddle.static.nn.instance_norm(input=X)
             static_ret = self.get_static_graph_result(
                 feed={'X': input}, fetch_list=[ret]
             )[0]
 
         with self.static_graph():
-            X = fluid.layers.data(
-                name='X', shape=shape, dtype='float32', append_batch_size=False
-            )
+            X = paddle.static.data(name='X', shape=shape, dtype='float32')
             instanceNorm = paddle.nn.InstanceNorm2D(num_features=shape[1])
             ret = instanceNorm(X)
             static_ret2 = self.get_static_graph_result(
@@ -914,12 +911,8 @@ class TestLayer(LayerTest):
         input = np.random.random(shape).astype('float32')
 
         with self.static_graph():
-            Weight = fluid.layers.data(
-                name='Weight',
-                shape=shape,
-                dtype='float32',
-                lod_level=1,
-                append_batch_size=False,
+            Weight = paddle.static.data(
+                name='Weight', shape=shape, dtype='float32', lod_level=1
             )
             ret = paddle.static.nn.spectral_norm(
                 weight=Weight, dim=1, power_iters=2
@@ -935,12 +928,8 @@ class TestLayer(LayerTest):
             )[0]
 
         with self.static_graph():
-            Weight = fluid.layers.data(
-                name='Weight',
-                shape=shape,
-                dtype='float32',
-                lod_level=1,
-                append_batch_size=False,
+            Weight = paddle.static.data(
+                name='Weight', shape=shape, dtype='float32', lod_level=1
             )
             spectralNorm = paddle.nn.SpectralNorm(shape, dim=1, power_iters=2)
             ret = spectralNorm(Weight)
@@ -968,7 +957,9 @@ class TestLayer(LayerTest):
         )
 
         with self.static_graph():
-            img = layers.data(name='pixel', shape=[3, 2, 2, 2], dtype='float32')
+            img = paddle.static.data(
+                name='pixel', shape=[-1, 3, 2, 2, 2], dtype='float32'
+            )
             out = paddle.static.nn.conv3d_transpose(
                 input=img, num_filters=12, filter_size=12, use_cudnn=True
             )
@@ -976,7 +967,9 @@ class TestLayer(LayerTest):
                 feed={'pixel': input_array}, fetch_list=[out]
             )[0]
         with self.static_graph():
-            img = layers.data(name='pixel', shape=[3, 2, 2, 2], dtype='float32')
+            img = paddle.static.data(
+                name='pixel', shape=[-1, 3, 2, 2, 2], dtype='float32'
+            )
             conv3d_transpose = paddle.nn.Conv3DTranspose(
                 in_channels=3, out_channels=12, kernel_size=12
             )
@@ -1081,8 +1074,8 @@ class TestLayer(LayerTest):
         value_b = np.arange(3)
         # less than
         with self.static_graph():
-            a = layers.data(name='a', shape=[1], dtype='int64')
-            b = layers.data(name='b', shape=[1], dtype='int64')
+            a = paddle.static.data(name='a', shape=[-1, 1], dtype='int64')
+            b = paddle.static.data(name='b', shape=[-1, 1], dtype='int64')
             cond = paddle.less_than(x=a, y=b)
             static_ret = self.get_static_graph_result(
                 feed={"a": value_a, "b": value_b}, fetch_list=[cond]
@@ -1097,8 +1090,8 @@ class TestLayer(LayerTest):
 
         # less equal
         with self.static_graph():
-            a1 = layers.data(name='a1', shape=[1], dtype='int64')
-            b1 = layers.data(name='b1', shape=[1], dtype='int64')
+            a1 = paddle.static.data(name='a1', shape=[-1, 1], dtype='int64')
+            b1 = paddle.static.data(name='b1', shape=[-1, 1], dtype='int64')
             cond1 = paddle.less_equal(x=a1, y=b1)
             static_ret1 = self.get_static_graph_result(
                 feed={"a1": value_a, "b1": value_b}, fetch_list=[cond1]
@@ -1113,8 +1106,8 @@ class TestLayer(LayerTest):
 
         # greater than
         with self.static_graph():
-            a2 = layers.data(name='a2', shape=[1], dtype='int64')
-            b2 = layers.data(name='b2', shape=[1], dtype='int64')
+            a2 = paddle.static.data(name='a2', shape=[-1, 1], dtype='int64')
+            b2 = paddle.static.data(name='b2', shape=[-1, 1], dtype='int64')
             cond2 = paddle.greater_than(x=a2, y=b2)
             static_ret2 = self.get_static_graph_result(
                 feed={"a2": value_a, "b2": value_b}, fetch_list=[cond2]
@@ -1129,8 +1122,8 @@ class TestLayer(LayerTest):
 
         # greater equal
         with self.static_graph():
-            a3 = layers.data(name='a3', shape=[1], dtype='int64')
-            b3 = layers.data(name='b3', shape=[1], dtype='int64')
+            a3 = paddle.static.data(name='a3', shape=[-1, 1], dtype='int64')
+            b3 = paddle.static.data(name='b3', shape=[-1, 1], dtype='int64')
             cond3 = paddle.greater_equal(x=a3, y=b3)
             static_ret3 = self.get_static_graph_result(
                 feed={"a3": value_a, "b3": value_b}, fetch_list=[cond3]
@@ -1145,8 +1138,8 @@ class TestLayer(LayerTest):
 
         # equal
         with self.static_graph():
-            a4 = layers.data(name='a4', shape=[1], dtype='int64')
-            b4 = layers.data(name='b4', shape=[1], dtype='int64')
+            a4 = paddle.static.data(name='a4', shape=[-1, 1], dtype='int64')
+            b4 = paddle.static.data(name='b4', shape=[-1, 1], dtype='int64')
             cond4 = paddle.equal(x=a4, y=b4)
             static_ret4 = self.get_static_graph_result(
                 feed={"a4": value_a, "b4": value_b}, fetch_list=[cond4]
@@ -1161,8 +1154,8 @@ class TestLayer(LayerTest):
 
         # not equal
         with self.static_graph():
-            a5 = layers.data(name='a5', shape=[1], dtype='int64')
-            b5 = layers.data(name='b5', shape=[1], dtype='int64')
+            a5 = paddle.static.data(name='a5', shape=[-1, 1], dtype='int64')
+            b5 = paddle.static.data(name='b5', shape=[-1, 1], dtype='int64')
             cond5 = paddle.equal(x=a5, y=b5)
             static_ret5 = self.get_static_graph_result(
                 feed={"a5": value_a, "b5": value_b}, fetch_list=[cond5]
@@ -1349,22 +1342,20 @@ class TestLayer(LayerTest):
 
     def test_crop_tensor(self):
         with self.static_graph():
-            x = fluid.layers.data(name="x1", shape=[6, 5, 8])
-
-            dim1 = fluid.layers.data(
-                name="dim1", shape=[1], append_batch_size=False
-            )
-            dim2 = fluid.layers.data(
-                name="dim2", shape=[1], append_batch_size=False
+            x = paddle.static.data(
+                name="x1", shape=[-1, 6, 5, 8], dtype="float32"
             )
+
+            dim1 = paddle.static.data(name="dim1", shape=[1], dtype="float32")
+            dim2 = paddle.static.data(name="dim2", shape=[1], dtype="float32")
             crop_shape1 = (1, 2, 4, 4)
-            crop_shape2 = fluid.layers.data(
-                name="crop_shape", shape=[4], append_batch_size=False
+            crop_shape2 = paddle.static.data(
+                name="crop_shape", shape=[4], dtype="float32"
             )
             crop_shape3 = [-1, dim1, dim2, 4]
             crop_offsets1 = [0, 0, 1, 0]
-            crop_offsets2 = fluid.layers.data(
-                name="crop_offset", shape=[4], append_batch_size=False
+            crop_offsets2 = paddle.static.data(
+                name="crop_offset", shape=[4], dtype="float32"
             )
             crop_offsets3 = [0, dim1, dim2, 0]
 
@@ -1378,7 +1369,9 @@ class TestLayer(LayerTest):
 
     def test_shard_index(self):
         with self.static_graph():
-            x = fluid.layers.data(name="label", shape=[4, 1], dtype='int64')
+            x = paddle.static.data(
+                name="label", shape=[-1, 4, 1], dtype='int64'
+            )
             shard_label = paddle.shard_index(
                 input=x, index_num=20, nshards=2, shard_id=0
             )
@@ -1517,12 +1510,15 @@ class TestBook(LayerTest):
                 self._feed_dict[name] = self._get_np_data(
                     shape, dtype, append_batch_size
                 )
-            return layers.data(
+            if append_batch_size:
+                shape = [-1] + shape
+            data = paddle.static.data(
                 name=name,
                 shape=shape,
                 dtype=dtype,
-                append_batch_size=append_batch_size,
             )
+            data.desc.set_need_check_feed(False)
+            return data
 
     def make_fit_a_line(self):
         with program_guard(
@@ -1803,8 +1799,8 @@ class TestBook(LayerTest):
             updates = self._get_data(
                 name='updates',
                 shape=[2, 3],
-                append_batch_size=False,
                 dtype='float32',
+                append_batch_size=False,
             )
             out = paddle.scatter(x, index=idx, updates=updates)
             return out
@@ -2065,11 +2061,17 @@ class TestBook(LayerTest):
 
     def test_affine_grid(self):
         with self.static_graph():
-            data = layers.data(name='data', shape=[2, 3, 3], dtype="float32")
+            data = paddle.static.data(
+                name='data', shape=[-1, 2, 3, 3], dtype="float32"
+            )
             out = paddle.argsort(x=data, axis=1)
 
-            theta = layers.data(name="theta", shape=[2, 3], dtype="float32")
-            out_shape = layers.data(name="out_shape", shape=[-1], dtype="int32")
+            theta = paddle.static.data(
+                name="theta", shape=[-1, 2, 3], dtype="float32"
+            )
+            out_shape = paddle.static.data(
+                name="out_shape", shape=[-1], dtype="int32"
+            )
             data_0 = paddle.nn.functional.affine_grid(theta, out_shape)
             data_1 = paddle.nn.functional.affine_grid(theta, [5, 3, 28, 28])
 
@@ -2082,7 +2084,9 @@ class TestBook(LayerTest):
         ends = [3, 3, 4]
         strides = [1, 1, 1]
         with self.static_graph():
-            x = layers.data(name="x", shape=[245, 30, 30], dtype="float32")
+            x = paddle.static.data(
+                name="x", shape=[-1, 245, 30, 30], dtype="float32"
+            )
             out = paddle.strided_slice(
                 x, axes=axes, starts=starts, ends=ends, strides=strides
             )
@@ -2101,31 +2105,38 @@ class TestBook(LayerTest):
     def test_sequence_expand(self):
         # TODO(minqiyang): dygraph do not support lod now
         with self.static_graph():
-            x = layers.data(name='x', shape=[10], dtype='float32')
-            y = layers.data(
-                name='y', shape=[10, 20], dtype='float32', lod_level=2
+            x = paddle.static.data(name='x', shape=[-1, 10], dtype='float32')
+            y = paddle.static.data(
+                name='y', shape=[-1, 10, 20], dtype='float32', lod_level=2
             )
             return layers.sequence_expand(x=x, y=y, ref_level=1)
 
     def test_sequence_reshape(self):
         # TODO(minqiyang): dygraph do not support lod now
         with self.static_graph():
-            x = layers.data(name='x', shape=[8], dtype='float32', lod_level=1)
+            x = paddle.static.data(
+                name='x', shape=[-1, 8], dtype='float32', lod_level=1
+            )
             out = layers.sequence_reshape(input=x, new_dim=16)
             return out
 
     def test_sequence_unpad(self):
         # TODO(minqiyang): dygraph do not support lod now
         with self.static_graph():
-            x = layers.data(name='x', shape=[10, 5], dtype='float32')
-            length = layers.data(name='length', shape=[], dtype='int64')
+            x = paddle.static.data(name='x', shape=[-1, 10, 5], dtype='float32')
+            length = paddle.static.data(
+                name='length', shape=[-1], dtype='int64'
+            )
             return layers.sequence_unpad(x=x, length=length)
 
     def test_sequence_softmax(self):
         # TODO(minqiyang): dygraph do not support lod now
         with self.static_graph():
-            seq_data = layers.data(
-                name='seq_data', shape=[10, 10], dtype='float32', lod_level=1
+            seq_data = paddle.static.data(
+                name='seq_data',
+                shape=[-1, 10, 10],
+                dtype='float32',
+                lod_level=1,
             )
             seq = paddle.static.nn.fc(x=seq_data, size=20)
             return layers.sequence_softmax(seq)
@@ -2133,27 +2144,23 @@ class TestBook(LayerTest):
     def test_sequence_unsqueeze(self):
         # TODO(minqiyang): dygraph do not support lod now
         with self.static_graph():
-            x = layers.data(name='x', shape=[8, 2], dtype='float32')
+            x = paddle.static.data(name='x', shape=[-1, 8, 2], dtype='float32')
             out = paddle.unsqueeze(x, axis=[1])
             return out
 
     def test_sequence_scatter(self):
         # TODO(minqiyang): dygraph do not support lod now
         with self.static_graph():
-            x = layers.data(
-                name='x', shape=[3, 6], append_batch_size=False, dtype='float32'
-            )
-            idx = layers.data(
+            x = paddle.static.data(name='x', shape=[3, 6], dtype='float32')
+            idx = paddle.static.data(
                 name='idx',
                 shape=[12, 1],
-                append_batch_size=False,
                 dtype='int32',
                 lod_level=1,
             )
-            updates = layers.data(
+            updates = paddle.static.data(
                 name='updates',
                 shape=[12, 1],
-                append_batch_size=False,
                 dtype='float32',
                 lod_level=1,
             )
@@ -2165,8 +2172,8 @@ class TestBook(LayerTest):
         with self.static_graph():
             import numpy as np
 
-            seqs = layers.data(
-                name='x', shape=[10, 5], dtype='float32', lod_level=1
+            seqs = paddle.static.data(
+                name='x', shape=[-1, 10, 5], dtype='float32', lod_level=1
             )
             offset = layers.assign(input=np.array([[0, 1]]).astype('int32'))
             length = layers.assign(input=np.array([[2, 1]]).astype('int32'))
@@ -2178,8 +2185,8 @@ class TestBook(LayerTest):
     def test_shuffle_batch(self):
         # TODO(minqiyang): dygraph do not support lod now
         with self.static_graph():
-            x = layers.data(
-                name='X', shape=[4, 50], dtype='float32', lod_level=0
+            x = paddle.static.data(
+                name='X', shape=[-1, 4, 50], dtype='float32', lod_level=0
             )
             out1 = fluid.contrib.layers.shuffle_batch(x)
             default_main_program().random_seed = 1000
@@ -2240,21 +2247,25 @@ class TestBook(LayerTest):
     def test_sequence_enumerate(self):
         # TODO(minqiyang): dygraph do not support lod now
         with self.static_graph():
-            x = layers.data(name="input", shape=[1], dtype='int32', lod_level=1)
+            x = paddle.static.data(
+                name="input", shape=[-1, 1], dtype='int32', lod_level=1
+            )
             out = layers.sequence_enumerate(input=x, win_size=2, pad_value=0)
 
     def test_row_conv(self):
         # TODO(minqiyang): dygraph do not support lod now
         with self.static_graph():
-            x = layers.data(name='x', shape=[16], dtype='float32', lod_level=1)
+            x = paddle.static.data(
+                name='x', shape=[-1, 16], dtype='float32', lod_level=1
+            )
             out = paddle.static.nn.row_conv(input=x, future_context_size=2)
             return out
 
     def test_simple_conv2d(self):
         # TODO(minqiyang): dygraph do not support layers with param now
         with self.static_graph():
-            images = layers.data(
-                name='pixel', shape=[3, 48, 48], dtype='float32'
+            images = paddle.static.data(
+                name='pixel', shape=[-1, 3, 48, 48], dtype='float32'
             )
             return paddle.static.nn.conv2d(
                 input=images, num_filters=3, filter_size=[4, 4]
@@ -2263,16 +2274,17 @@ class TestBook(LayerTest):
     def test_squeeze(self):
         # TODO(minqiyang): dygraph do not support layers with param now
         with self.static_graph():
-            x = layers.data(name='x', shape=[1, 1, 4], dtype='float32')
+            x = paddle.static.data(
+                name='x', shape=[-1, 1, 1, 4], dtype='float32'
+            )
             out = paddle.squeeze(x, axis=[2])
             return out
 
     def test_flatten(self):
         # TODO(minqiyang): dygraph do not support op without kernel now
         with self.static_graph():
-            x = layers.data(
+            x = paddle.static.data(
                 name='x',
-                append_batch_size=False,
                 shape=[4, 4, 3],
                 dtype="float32",
             )
@@ -2288,7 +2300,9 @@ class TestBook(LayerTest):
 
     def test_unfold(self):
         with self.static_graph():
-            x = layers.data(name='x', shape=[3, 20, 20], dtype='float32')
+            x = paddle.static.data(
+                name='x', shape=[-1, 3, 20, 20], dtype='float32'
+            )
             out = paddle.nn.functional.unfold(x, [3, 3], 1, 1, 1)
             return out
 
@@ -2308,18 +2322,13 @@ class TestBook(LayerTest):
         with program_guard(
             fluid.default_main_program(), fluid.default_startup_program()
         ):
-            input = layers.data(
+            input = paddle.static.data(
                 name='input_data',
                 shape=[3, 3],
-                append_batch_size=False,
                 dtype='float32',
             )
-            x = layers.data(
-                name='x', shape=[3, 2], append_batch_size=False, dtype='float32'
-            )
-            y = layers.data(
-                name='y', shape=[2, 3], append_batch_size=False, dtype='float32'
-            )
+            x = paddle.static.data(name='x', shape=[3, 2], dtype='float32')
+            y = paddle.static.data(name='y', shape=[2, 3], dtype='float32')
 
             out = paddle.addmm(input=input, x=x, y=y)
             return out
diff --git a/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py b/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py
index 1ed1a713e30aa3cf3f3fc7a479ea1ec1583a2070..0c1327c5af3a110a6e375c06c2758d1d42ac742d 100644
--- a/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py
+++ b/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py
@@ -31,9 +31,9 @@ paddle.enable_static()
 
 def run_pserver(use_cuda, sync_mode, ip, port, trainers, trainer_id):
     remove_ps_flag(os.getpid())
-    x = fluid.layers.data(name='x', shape=[1], dtype='float32')
+    x = paddle.static.data(name='x', shape=[-1, 1], dtype='float32')
     y_predict = paddle.static.nn.fc(x, size=1, activation=None)
-    y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+    y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
 
     # loss function
     cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
@@ -68,9 +68,9 @@ def run_pserver_with_empty_block(
     use_cuda, sync_mode, ip, port, trainers, trainer_id
 ):
     remove_ps_flag(os.getpid())
-    x = fluid.layers.data(name='x', shape=[1], dtype='float32')
+    x = paddle.static.data(name='x', shape=[-1, 1], dtype='float32')
     y_predict = paddle.static.nn.fc(x, size=1, bias_attr=False)
-    y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+    y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
 
     # loss function
     cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
diff --git a/python/paddle/fluid/tests/unittests/test_load_vars_shape_check.py b/python/paddle/fluid/tests/unittests/test_load_vars_shape_check.py
index 3b2465a344ee1f96e5d02a6ddfb3a7e38a620aa5..853b364f358ab1be9504cfd31a8483389ef0bbcb 100644
--- a/python/paddle/fluid/tests/unittests/test_load_vars_shape_check.py
+++ b/python/paddle/fluid/tests/unittests/test_load_vars_shape_check.py
@@ -30,7 +30,9 @@ class TestLoadVarsShapeCheck(unittest.TestCase):
         startup_program_1 = fluid.Program()
 
         with fluid.program_guard(program_1, startup_program_1):
-            input = fluid.layers.data(name="x", shape=[-1, 10], dtype='float32')
+            input = paddle.static.data(
+                name="x", shape=[-1, 10], dtype='float32'
+            )
             out = paddle.static.nn.fc(input, 20)
         place = fluid.CPUPlace()
         exe = Executor(place)
diff --git a/python/paddle/fluid/tests/unittests/test_lookup_table_bf16_op.py b/python/paddle/fluid/tests/unittests/test_lookup_table_bf16_op.py
index ec9eee17c0141600651bd4074667042b64a856f9..cc11e96f5a9150502d20d2b2d25872ed8603ba11 100644
--- a/python/paddle/fluid/tests/unittests/test_lookup_table_bf16_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lookup_table_bf16_op.py
@@ -16,6 +16,7 @@ import unittest
 
 import numpy as np
 
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 from paddle import enable_static
@@ -233,7 +234,9 @@ class TestEmbeddingLayerBF16ConstantInitializer(unittest.TestCase):
         self.set_initializer()
 
         with fluid.program_guard(self.prog, self.startup_prog):
-            x = fluid.layers.data(name='x', shape=self.ids_shape, dtype='int64')
+            x = paddle.static.data(
+                name='x', shape=self.ids_shape, dtype='int64'
+            )
             self.emb = fluid.layers.embedding(
                 input=x,
                 size=self.w_shape,
diff --git a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_bf16_op.py b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_bf16_op.py
index c022f564809a981ad8dd8268b349f0a068db4606..8cbc6242b3af985fe497f0d3f8b283d24db49175 100644
--- a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_bf16_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_bf16_op.py
@@ -102,7 +102,9 @@ class TestEmbeddingLayerBF16ConstantInitializer(unittest.TestCase):
         self.set_initializer()
 
         with fluid.program_guard(self.prog, self.startup_prog):
-            x = fluid.layers.data(name='x', shape=self.ids_shape, dtype='int64')
+            x = paddle.static.data(
+                name='x', shape=[-1] + self.ids_shape, dtype='int64'
+            )
             self.emb = fluid.input.embedding(
                 input=x,
                 size=self.w_shape,
diff --git a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py
index cea6858e0d4d31aaf3b6f21597bbc8635af63937..eb9c4c60893e06507e9733567b60f9a8aff92e05 100644
--- a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py
@@ -200,8 +200,8 @@ class TestLookupTableIsSparse(unittest.TestCase):
         self.init_data()
         main_program = fluid.Program()
         with fluid.program_guard(main_program, fluid.Program()):
-            x = fluid.layers.data(name='x', shape=[5], dtype='int64')
-            y_ = fluid.layers.data(name='y_', shape=[5], dtype='float32')
+            x = paddle.static.data(name='x', shape=[-1, 5], dtype='int64')
+            y_ = paddle.static.data(name='y_', shape=[-1, 5], dtype='float32')
             emb = fluid.input.embedding(
                 input=x,
                 size=[10, 16],
@@ -246,7 +246,7 @@ class TestLookupTableIsSparse(unittest.TestCase):
 
 class TestLookupTableApi(unittest.TestCase):
     def test_api(self):
-        x = fluid.layers.data(name='x', shape=[20], dtype='int64')
+        x = paddle.static.data(name='x', shape=[-1, 20], dtype='int64')
         emb = fluid.embedding(input=x, size=[128, 64])
 
         place = fluid.CPUPlace()
diff --git a/python/paddle/fluid/tests/unittests/test_math_op_patch.py b/python/paddle/fluid/tests/unittests/test_math_op_patch.py
index 41cd39088f379a000afdf20a556545b93950d224..4bf9933ca39cdf4ecb78b42fcea93dbe81679f88 100644
--- a/python/paddle/fluid/tests/unittests/test_math_op_patch.py
+++ b/python/paddle/fluid/tests/unittests/test_math_op_patch.py
@@ -28,7 +28,7 @@ class TestMathOpPatches(unittest.TestCase):
 
     @prog_scope()
     def test_add_scalar(self):
-        a = fluid.layers.data(name="a", shape=[1])
+        a = paddle.static.data(name="a", shape=[-1, 1])
         b = a + 10
         ab = fluid.layers.concat(input=[a, b], axis=1)
         c = ab + 10
@@ -48,7 +48,7 @@ class TestMathOpPatches(unittest.TestCase):
 
     @prog_scope()
     def test_radd_scalar(self):
-        a = fluid.layers.data(name="a", shape=[1])
+        a = paddle.static.data(name="a", shape=[-1, 1])
         b = 10 + a
         place = fluid.CPUPlace()
         exe = fluid.Executor(place)
@@ -60,7 +60,7 @@ class TestMathOpPatches(unittest.TestCase):
 
     @prog_scope()
     def test_sub_scalar(self):
-        a = fluid.layers.data(name="a", shape=[1])
+        a = paddle.static.data(name="a", shape=[-1, 1])
         b = a - 10
         place = fluid.CPUPlace()
         exe = fluid.Executor(place)
@@ -72,7 +72,7 @@ class TestMathOpPatches(unittest.TestCase):
 
     @prog_scope()
     def test_rsub_scalar(self):
-        a = fluid.layers.data(name="a", shape=[1])
+        a = paddle.static.data(name="a", shape=[-1, 1])
         b = 10 - a
         place = fluid.CPUPlace()
         exe = fluid.Executor(place)
@@ -84,7 +84,7 @@ class TestMathOpPatches(unittest.TestCase):
 
     @prog_scope()
     def test_mul_scalar(self):
-        a = fluid.layers.data(name="a", shape=[1])
+        a = paddle.static.data(name="a", shape=[-1, 1])
         b = a * 10
         place = fluid.CPUPlace()
         exe = fluid.Executor(place)
@@ -96,7 +96,7 @@ class TestMathOpPatches(unittest.TestCase):
 
     @prog_scope()
     def test_rmul_scalar(self):
-        a = fluid.layers.data(name="a", shape=[1])
+        a = paddle.static.data(name="a", shape=[-1, 1])
         b = 10 * a
         place = fluid.CPUPlace()
         exe = fluid.Executor(place)
@@ -108,7 +108,7 @@ class TestMathOpPatches(unittest.TestCase):
 
     @prog_scope()
     def test_div_scalar(self):
-        a = fluid.layers.data(name="a", shape=[1])
+        a = paddle.static.data(name="a", shape=[-1, 1])
         b = a / 10
         place = fluid.CPUPlace()
         exe = fluid.Executor(place)
@@ -120,7 +120,7 @@ class TestMathOpPatches(unittest.TestCase):
 
     @prog_scope()
     def test_rdiv_scalar(self):
-        a = fluid.layers.data(name="a", shape=[1])
+        a = paddle.static.data(name="a", shape=[-1, 1])
         b = 10 / a
         place = fluid.CPUPlace()
         exe = fluid.Executor(place)
@@ -133,8 +133,8 @@ class TestMathOpPatches(unittest.TestCase):
 
     @prog_scope()
     def test_div_two_tensor(self):
-        a = fluid.layers.data(name="a", shape=[1])
-        b = fluid.layers.data(name="b", shape=[1])
+        a = paddle.static.data(name="a", shape=[-1, 1])
+        b = paddle.static.data(name="b", shape=[-1, 1])
         c = a / b
         place = fluid.CPUPlace()
         exe = fluid.Executor(place)
@@ -149,8 +149,8 @@ class TestMathOpPatches(unittest.TestCase):
 
     @prog_scope()
     def test_mul_two_tensor(self):
-        a = fluid.layers.data(name="a", shape=[1])
-        b = fluid.layers.data(name="b", shape=[1])
+        a = paddle.static.data(name="a", shape=[-1, 1])
+        b = paddle.static.data(name="b", shape=[-1, 1])
         c = a * b
         place = fluid.CPUPlace()
         exe = fluid.Executor(place)
@@ -165,8 +165,8 @@ class TestMathOpPatches(unittest.TestCase):
 
     @prog_scope()
     def test_add_two_tensor(self):
-        a = fluid.layers.data(name="a", shape=[1])
-        b = fluid.layers.data(name="b", shape=[1])
+        a = paddle.static.data(name="a", shape=[-1, 1])
+        b = paddle.static.data(name="b", shape=[-1, 1])
         c = a + b
         place = fluid.CPUPlace()
         exe = fluid.Executor(place)
@@ -181,8 +181,8 @@ class TestMathOpPatches(unittest.TestCase):
 
     @prog_scope()
     def test_sub_two_tensor(self):
-        a = fluid.layers.data(name="a", shape=[1])
-        b = fluid.layers.data(name="b", shape=[1])
+        a = paddle.static.data(name="a", shape=[-1, 1])
+        b = paddle.static.data(name="b", shape=[-1, 1])
         c = a - b
         place = fluid.CPUPlace()
         exe = fluid.Executor(place)
@@ -197,7 +197,7 @@ class TestMathOpPatches(unittest.TestCase):
 
     @prog_scope()
     def test_integer_div(self):
-        a = fluid.layers.data(name="a", shape=[1], dtype='int64')
+        a = paddle.static.data(name="a", shape=[-1, 1], dtype='int64')
         b = a / 7
         place = fluid.CPUPlace()
         exe = fluid.Executor(place)
@@ -211,8 +211,8 @@ class TestMathOpPatches(unittest.TestCase):
 
     @prog_scope()
     def test_equal(self):
-        a = fluid.layers.data(name="a", shape=[1], dtype='float32')
-        b = fluid.layers.data(name="b", shape=[1], dtype='float32')
+        a = paddle.static.data(name="a", shape=[-1, 1], dtype='float32')
+        b = paddle.static.data(name="b", shape=[-1, 1], dtype='float32')
         c = a == b
 
         place = fluid.CPUPlace()
@@ -231,9 +231,10 @@ class TestMathOpPatches(unittest.TestCase):
 
     @prog_scope()
     def test_equal_and_cond(self):
-        a = fluid.layers.data(name="a", shape=[1], dtype='float32')
-        b = fluid.layers.data(name="b", shape=[1], dtype='float32')
-
+        a = paddle.static.data(name="a", shape=[-1, 1], dtype='float32')
+        a.desc.set_need_check_feed(False)
+        b = paddle.static.data(name="b", shape=[-1, 1], dtype='float32')
+        b.desc.set_need_check_feed(False)
         one = paddle.ones(shape=[1], dtype='int32')
         zero = fluid.layers.zeros(shape=[1], dtype='int32')
         cond = one == zero
@@ -241,8 +242,8 @@ class TestMathOpPatches(unittest.TestCase):
 
         place = fluid.CPUPlace()
         exe = fluid.Executor(place)
-        a_np = np.array([3, 4, 10, 14, 9, 18]).astype('float')
-        b_np = np.array([3, 4, 11, 15, 8, 18]).astype('float')
+        a_np = np.array([3, 4, 10, 14, 9, 18]).astype('float32')
+        b_np = np.array([3, 4, 11, 15, 8, 18]).astype('float32')
         (c_np,) = exe.run(
             fluid.default_main_program(),
             feed={"a": a_np, "b": b_np},
@@ -253,7 +254,8 @@ class TestMathOpPatches(unittest.TestCase):
 
     @prog_scope()
     def test_neg(self):
-        a = fluid.layers.data(name="a", shape=[10, 1])
+        a = paddle.static.data(name="a", shape=[-1, 10, 1], dtype='float32')
+        a.desc.set_need_check_feed(False)
         b = -a
         place = fluid.CPUPlace()
         exe = fluid.Executor(place)
@@ -266,7 +268,8 @@ class TestMathOpPatches(unittest.TestCase):
 
     @prog_scope()
     def test_astype(self):
-        a = fluid.layers.data(name="a", shape=[10, 1])
+        a = paddle.static.data(name="a", shape=[-1, 10, 1])
+        a.desc.set_need_check_feed(False)
         b = a.astype('float32')
         place = fluid.CPUPlace()
         exe = fluid.Executor(place)
diff --git a/python/paddle/fluid/tests/unittests/test_mean_op.py b/python/paddle/fluid/tests/unittests/test_mean_op.py
index c6ade1ca53d0f0515acec4aea1dccb623f8373c8..3fcd423aa56c532128c9f43e90bb9d97af102c52 100644
--- a/python/paddle/fluid/tests/unittests/test_mean_op.py
+++ b/python/paddle/fluid/tests/unittests/test_mean_op.py
@@ -23,7 +23,6 @@ from test_sum_op import TestReduceOPTensorAxisBase
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import paddle.fluid.layers as layers
 from paddle.fluid import Program, program_guard
 
 np.random.seed(10)
@@ -82,12 +81,12 @@ class TestMeanOpError(unittest.TestCase):
             input1 = 12
             self.assertRaises(TypeError, paddle.mean, input1)
             # The input dtype of mean_op must be float16, float32, float64.
-            input2 = fluid.layers.data(
-                name='input2', shape=[12, 10], dtype="int32"
+            input2 = paddle.static.data(
+                name='input2', shape=[-1, 12, 10], dtype="int32"
             )
             self.assertRaises(TypeError, paddle.mean, input2)
-            input3 = fluid.layers.data(
-                name='input3', shape=[4], dtype="float16"
+            input3 = paddle.static.data(
+                name='input3', shape=[-1, 4], dtype="float16"
             )
             paddle.nn.functional.softmax(input3)
 
@@ -442,7 +441,7 @@ class TestMeanDoubleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float32
 
-        data = layers.data('data', [3, 4, 5], False, dtype)
+        data = paddle.static.data('data', [3, 4, 5], dtype)
         data.persistable = True
         out = paddle.mean(data)
         data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
@@ -473,7 +472,7 @@ class TestMeanTripleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float32
 
-        data = layers.data('data', [3, 4, 5], False, dtype)
+        data = paddle.static.data('data', [3, 4, 5], dtype)
         data.persistable = True
         out = paddle.mean(data)
         data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
diff --git a/python/paddle/fluid/tests/unittests/test_memory_reuse_exclude_feed_var.py b/python/paddle/fluid/tests/unittests/test_memory_reuse_exclude_feed_var.py
index bdd6b3d30489937fcb5fe32667c65c5fc635c70a..32ce652ace45ab6ecf774cfa20fc716678055bf6 100644
--- a/python/paddle/fluid/tests/unittests/test_memory_reuse_exclude_feed_var.py
+++ b/python/paddle/fluid/tests/unittests/test_memory_reuse_exclude_feed_var.py
@@ -27,8 +27,8 @@ class TestMemoryReuseExcludeFeedVar(unittest.TestCase):
         self.iteration = 10
 
     def main_impl(self, place):
-        image = fluid.layers.data(
-            name='image', shape=self.image_shape, dtype='float32'
+        image = paddle.static.data(
+            name='image', shape=[-1] + self.image_shape, dtype='float32'
         )
         relu_image = F.relu(image)
         loss = paddle.mean(relu_image)
diff --git a/python/paddle/fluid/tests/unittests/test_memory_usage.py b/python/paddle/fluid/tests/unittests/test_memory_usage.py
index b083bc6d3051b80e5cf0053a76e062dae785d2a1..a9b2d2a92b86fa8fe14844cd86388e852cc507aa 100644
--- a/python/paddle/fluid/tests/unittests/test_memory_usage.py
+++ b/python/paddle/fluid/tests/unittests/test_memory_usage.py
@@ -26,9 +26,9 @@ def train_simulator(test_batch_size=10):
             "but got batch_size={}".format(test_batch_size)
         )
 
-    x = fluid.layers.data(name='x', shape=[13], dtype='float32')
+    x = paddle.static.data(name='x', shape=[-1, 13], dtype='float32')
     y_predict = paddle.static.nn.fc(x, size=1, activation=None)
-    y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+    y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
 
     cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
     avg_cost = paddle.mean(cost)
diff --git a/python/paddle/fluid/tests/unittests/test_mix_precision_all_reduce_fuse.py b/python/paddle/fluid/tests/unittests/test_mix_precision_all_reduce_fuse.py
index 20bf8e0985aa3cc3b2ed3f002af864024dc91d4d..6a1c6af11fd026e45eb479f09022912984ae5aad 100644
--- a/python/paddle/fluid/tests/unittests/test_mix_precision_all_reduce_fuse.py
+++ b/python/paddle/fluid/tests/unittests/test_mix_precision_all_reduce_fuse.py
@@ -36,8 +36,10 @@ def loss_net(hidden, label):
 
 
 def conv_net(use_feed):
-    img = fluid.layers.data(name='image', shape=img_shape, dtype='float16')
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    img = paddle.static.data(
+        name='image', shape=[-1] + img_shape, dtype='float16'
+    )
+    label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
 
     conv_pool_1 = fluid.nets.simple_img_conv_pool(
         input=img,
diff --git a/python/paddle/fluid/tests/unittests/test_momentum_op.py b/python/paddle/fluid/tests/unittests/test_momentum_op.py
index 4b745cb7a6fb0c682332b04811a26612fc3566e8..6ec02735d37d258eecbfa3b5d248a2fe3b8689b0 100644
--- a/python/paddle/fluid/tests/unittests/test_momentum_op.py
+++ b/python/paddle/fluid/tests/unittests/test_momentum_op.py
@@ -528,8 +528,8 @@ class TestMomentumV2(unittest.TestCase):
         place = fluid.CPUPlace()
         main = fluid.Program()
         with fluid.program_guard(main):
-            x = fluid.layers.data(name='x', shape=[13], dtype='float32')
-            y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+            x = paddle.static.data(name='x', shape=[-1, 13], dtype='float32')
+            y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
             y_predict = paddle.static.nn.fc(x, size=1, activation=None)
             cost = paddle.nn.functional.square_error_cost(
                 input=y_predict, label=y
@@ -666,8 +666,8 @@ class TestMomentumOpWithDecayAPI(unittest.TestCase):
         place = fluid.CPUPlace()
         main = fluid.Program()
         with fluid.program_guard(main):
-            x = fluid.layers.data(name='x', shape=[13], dtype='float32')
-            y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+            x = paddle.static.data(name='x', shape=[-1, 13], dtype='float32')
+            y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
             y_predict = paddle.static.nn.fc(x, size=1, activation=None)
             cost = paddle.nn.functional.square_error_cost(
                 input=y_predict, label=y
diff --git a/python/paddle/fluid/tests/unittests/test_monitor.py b/python/paddle/fluid/tests/unittests/test_monitor.py
index 205414668f9f0cdd2048d1aceade78e594c54d12..a5d5e30176fb07753a1ec9e47c4031fccf825b92 100644
--- a/python/paddle/fluid/tests/unittests/test_monitor.py
+++ b/python/paddle/fluid/tests/unittests/test_monitor.py
@@ -54,8 +54,8 @@ class TestDatasetWithStat(unittest.TestCase):
         slots = ["slot1", "slot2", "slot3", "slot4"]
         slots_vars = []
         for slot in slots:
-            var = fluid.layers.data(
-                name=slot, shape=[1], dtype="int64", lod_level=1
+            var = paddle.static.data(
+                name=slot, shape=[-1, 1], dtype="int64", lod_level=1
             )
             slots_vars.append(var)
 
diff --git a/python/paddle/fluid/tests/unittests/test_mse_loss.py b/python/paddle/fluid/tests/unittests/test_mse_loss.py
index 99fbe012953f3dbd35696c7927df45deb155d5c7..75dafb1ea3553a8cadf7127427956194cdffd285 100644
--- a/python/paddle/fluid/tests/unittests/test_mse_loss.py
+++ b/python/paddle/fluid/tests/unittests/test_mse_loss.py
@@ -79,12 +79,14 @@ class TestNNMseLoss(unittest.TestCase):
                 else fluid.CPUPlace()
             )
             with fluid.program_guard(prog, startup_prog):
-                input = fluid.layers.data(
-                    name='input', shape=dim, dtype='float32'
+                input = paddle.static.data(
+                    name='input', shape=[-1] + dim, dtype='float32'
                 )
-                label = fluid.layers.data(
-                    name='label', shape=dim, dtype='float32'
+                input.desc.set_need_check_feed(False)
+                label = paddle.static.data(
+                    name='label', shape=[-1] + dim, dtype='float32'
                 )
+                label.desc.set_need_check_feed(False)
                 mse_loss = paddle.nn.loss.MSELoss()
                 ret = mse_loss(input, label)
 
@@ -123,12 +125,14 @@ class TestNNMseLoss(unittest.TestCase):
                 else fluid.CPUPlace()
             )
             with fluid.program_guard(prog, startup_prog):
-                input = fluid.layers.data(
-                    name='input', shape=dim, dtype='float32'
+                input = paddle.static.data(
+                    name='input', shape=[-1] + dim, dtype='float32'
                 )
-                label = fluid.layers.data(
-                    name='label', shape=dim, dtype='float32'
+                input.desc.set_need_check_feed(False)
+                label = paddle.static.data(
+                    name='label', shape=[-1] + dim, dtype='float32'
                 )
+                label.desc.set_need_check_feed(False)
                 mse_loss = paddle.nn.loss.MSELoss(reduction='sum')
                 ret = mse_loss(input, label)
 
@@ -167,12 +171,14 @@ class TestNNMseLoss(unittest.TestCase):
                 else fluid.CPUPlace()
             )
             with fluid.program_guard(prog, startup_prog):
-                input = fluid.layers.data(
-                    name='input', shape=dim, dtype='float32'
+                input = paddle.static.data(
+                    name='input', shape=[-1] + dim, dtype='float32'
                 )
-                label = fluid.layers.data(
-                    name='label', shape=dim, dtype='float32'
+                input.desc.set_need_check_feed(False)
+                label = paddle.static.data(
+                    name='label', shape=[-1] + dim, dtype='float32'
                 )
+                label.desc.set_need_check_feed(False)
                 mse_loss = paddle.nn.loss.MSELoss(reduction='none')
                 ret = mse_loss(input, label)
 
diff --git a/python/paddle/fluid/tests/unittests/test_multihead_attention.py b/python/paddle/fluid/tests/unittests/test_multihead_attention.py
index 9d94a7713b1f6e8dc05ddc06c12a5a9391bdcd1e..2db080edbfd9b35d412ef7c0c3d829ce5b04f877 100644
--- a/python/paddle/fluid/tests/unittests/test_multihead_attention.py
+++ b/python/paddle/fluid/tests/unittests/test_multihead_attention.py
@@ -31,18 +31,16 @@ class TestMultiheadAttention(unittest.TestCase):
 
     def set_program(self):
         """Build the test program."""
-        queries = fluid.layers.data(
+        queries = paddle.static.data(
             name="queries",
             shape=self.input_shape,
             dtype="float32",
-            append_batch_size=False,
         )
         queries.stop_gradient = False
-        keys = fluid.layers.data(
+        keys = paddle.static.data(
             name="keys",
             shape=self.input_shape,
             dtype="float32",
-            append_batch_size=False,
         )
         keys.stop_gradient = False
 
diff --git a/python/paddle/fluid/tests/unittests/test_name_scope.py b/python/paddle/fluid/tests/unittests/test_name_scope.py
index 372547722cb1878c331a441e95ccd51e62c2486e..daecd21ab7af819ca9a6c7f0f055299a1e3e00d5 100644
--- a/python/paddle/fluid/tests/unittests/test_name_scope.py
+++ b/python/paddle/fluid/tests/unittests/test_name_scope.py
@@ -21,7 +21,7 @@ import paddle.fluid as fluid
 class TestNameScope(unittest.TestCase):
     def test_name_scope(self):
         with fluid.name_scope("s1"):
-            a = fluid.layers.data(name='data', shape=[1], dtype='int32')
+            a = paddle.static.data(name='data', shape=[-1, 1], dtype='int32')
             b = a + 1
             with fluid.name_scope("s2"):
                 c = b * 1
diff --git a/python/paddle/fluid/tests/unittests/test_nce.py b/python/paddle/fluid/tests/unittests/test_nce.py
index ee51b0d608ad3fa283e14dcdfaf4d74efad49163..e2923da7113df8035429bad5c4cffedbf85ccb74 100644
--- a/python/paddle/fluid/tests/unittests/test_nce.py
+++ b/python/paddle/fluid/tests/unittests/test_nce.py
@@ -187,8 +187,10 @@ class TestNCECase1SelectedRows(unittest.TestCase):
         custom_dist,
         is_sparse,
     ):
-        input = fluid.layers.data(name="input", shape=[10], dtype="float32")
-        label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+        input = paddle.static.data(
+            name="input", shape=[-1, 10], dtype="float32"
+        )
+        label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64")
 
         w_param = (
             fluid.default_main_program()
@@ -288,7 +290,7 @@ class TestNCE_OpError(unittest.TestCase):
             input1 = fluid.create_lod_tensor(
                 np.array([0.0, 3.0, 2.0, 4.0]), [[1, 1, 2]], fluid.CPUPlace()
             )
-            label1 = fluid.layers.data(
+            label1 = paddle.static.data(
                 name='label1', shape=[-1, 4], dtype="int64"
             )
             # the input(input) of nce layer must be Variable.
@@ -296,7 +298,7 @@ class TestNCE_OpError(unittest.TestCase):
                 TypeError, paddle.static.nn.nce, input1, label1, 5
             )
 
-            input2 = fluid.layers.data(
+            input2 = paddle.static.data(
                 name='input2', shape=[-1, 4], dtype="float32"
             )
             label2 = fluid.create_lod_tensor(
@@ -307,10 +309,10 @@ class TestNCE_OpError(unittest.TestCase):
                 TypeError, paddle.static.nn.nce, input2, label2, 5
             )
 
-            input3 = fluid.layers.data(
+            input3 = paddle.static.data(
                 name='input3', shape=[-1, 4], dtype="float16"
             )
-            label3 = fluid.layers.data(
+            label3 = paddle.static.data(
                 name='label3', shape=[-1, 1], dtype="int64"
             )
             # the data type of input(input) must be float32 or float64.
@@ -318,10 +320,10 @@ class TestNCE_OpError(unittest.TestCase):
                 TypeError, paddle.static.nn.nce, input3, label3, 5
             )
 
-            input4 = fluid.layers.data(
+            input4 = paddle.static.data(
                 name='input4', shape=[-1, 4], dtype="float32"
             )
-            label4 = fluid.layers.data(
+            label4 = paddle.static.data(
                 name='label4', shape=[-1, 1], dtype="int32"
             )
             # the data type of input(label) must be int64.
diff --git a/python/paddle/fluid/tests/unittests/test_network_with_dtype.py b/python/paddle/fluid/tests/unittests/test_network_with_dtype.py
index cdde0c1a468ec6960377dc8a448f919fb020428e..19aa2b72198b9bbd393bd34397aff4b43e09e0bf 100644
--- a/python/paddle/fluid/tests/unittests/test_network_with_dtype.py
+++ b/python/paddle/fluid/tests/unittests/test_network_with_dtype.py
@@ -30,8 +30,8 @@ class TestNetWithDtype(unittest.TestCase):
         main = fluid.Program()
         startup = fluid.Program()
         with fluid.program_guard(main, startup):
-            x = fluid.layers.data(name='x', shape=[13], dtype=self.dtype)
-            y = fluid.layers.data(name='y', shape=[1], dtype=self.dtype)
+            x = paddle.static.data(name='x', shape=[-1, 13], dtype=self.dtype)
+            y = paddle.static.data(name='y', shape=[-1, 1], dtype=self.dtype)
             y_predict = paddle.static.nn.fc(x, size=1, activation=None)
             cost = paddle.nn.functional.square_error_cost(
                 input=y_predict, label=y
diff --git a/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_static.py b/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_static.py
index 1051ef8d4b00db5c261dd2c1def58a13b983b9c7..d89af631baa45b8829e9f38618acb1229aef02ba 100644
--- a/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_static.py
+++ b/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_static.py
@@ -16,6 +16,7 @@ import unittest
 
 import numpy as np
 
+import paddle
 import paddle.fluid as fluid
 import paddle.nn.functional as functional
 
@@ -41,10 +42,9 @@ class EmbeddingStatic(unittest.TestCase):
                     (128, 100), attr=param_attr, dtype="float32"
                 )
 
-                label = fluid.layers.data(
+                label = paddle.static.data(
                     name="label",
-                    shape=[4],
-                    append_batch_size=False,
+                    shape=[-1, 4],
                     dtype="int64",
                 )
 
@@ -74,10 +74,9 @@ class EmbeddingStatic(unittest.TestCase):
                     (128, 100), attr=param_attr, dtype="float32"
                 )
 
-                label = fluid.layers.data(
+                label = paddle.static.data(
                     name="label",
-                    shape=[4],
-                    append_batch_size=False,
+                    shape=[-1, 4],
                     dtype="int32",
                 )
 
diff --git a/python/paddle/fluid/tests/unittests/test_nn_functional_hot_op.py b/python/paddle/fluid/tests/unittests/test_nn_functional_hot_op.py
index 0faf226fac4d4f198b958405964836da0bf8e992..0043f15aed3a8c15d88aaa93b03657e4e2c2363c 100644
--- a/python/paddle/fluid/tests/unittests/test_nn_functional_hot_op.py
+++ b/python/paddle/fluid/tests/unittests/test_nn_functional_hot_op.py
@@ -134,9 +134,13 @@ class TestOneHotOp_exception(unittest.TestCase):
     def test_check_output(self):
         program = Program()
         with program_guard(program):
-            x = fluid.layers.data(
-                name='x', shape=[self.dimension], dtype='float32', lod_level=1
+            x = paddle.static.data(
+                name='x',
+                shape=[-1, self.dimension],
+                dtype='float32',
+                lod_level=1,
             )
+            x.desc.set_need_check_feed(False)
             block = program.current_block()
             one_hot_out = block.create_var(
                 name="one_hot_out",
@@ -181,7 +185,8 @@ class TestOneHotOpApi(unittest.TestCase):
             )
 
     def _run(self, num_classes):
-        label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+        label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64")
+        label.desc.set_need_check_feed(False)
         one_hot_label = functional.one_hot(x=label, num_classes=num_classes)
 
         place = fluid.CPUPlace()
@@ -205,12 +210,12 @@ class BadInputTestOnehotV2(unittest.TestCase):
         with fluid.program_guard(fluid.Program()):
 
             def test_bad_x():
-                label = fluid.layers.data(
+                label = paddle.static.data(
                     name="label",
                     shape=[4],
-                    append_batch_size=False,
                     dtype="float32",
                 )
+                label.desc.set_need_check_feed(False)
                 one_hot_label = functional.one_hot(x=label, num_classes=4)
 
             self.assertRaises(TypeError, test_bad_x)
diff --git a/python/paddle/fluid/tests/unittests/test_nn_grad.py b/python/paddle/fluid/tests/unittests/test_nn_grad.py
index ec355baefa9f67d96312c9d87da62086da8f1a8e..b86d3029ddd5a35ec1c4d50516b031bf4a2b224b 100644
--- a/python/paddle/fluid/tests/unittests/test_nn_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_nn_grad.py
@@ -21,7 +21,6 @@ from decorator_helper import prog_scope
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import paddle.fluid.layers as layers
 
 paddle.enable_static()
 
@@ -73,7 +72,7 @@ class TestReduceMeanWithDimDoubleGradCheck(unittest.TestCase):
         eps = 0.05
         dtype = np.float64
 
-        x = layers.data('x', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
         x.persistable = True
         y = paddle.mean(x, axis=0)
         x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
@@ -97,7 +96,7 @@ class TestReduceSumWithDimDoubleGradCheck(unittest.TestCase):
         eps = 0.05
         dtype = np.float64
 
-        x = layers.data('x', shape, False, dtype)
+        x = paddle.static.data('x', shape, dtype)
         x.persistable = True
         y = paddle.sum(x, axis=0)
         x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
@@ -122,7 +121,7 @@ class TestReshapeDoubleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float64
 
-        x = layers.data('x', x_shape, False, dtype)
+        x = paddle.static.data('x', x_shape, dtype)
         x.persistable = True
         out = paddle.reshape(x, new_shape)
         x_arr = np.random.uniform(-1, 1, x_shape).astype(dtype)
@@ -150,7 +149,7 @@ class TestTileDoubleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float64
 
-        x = layers.data('x', x_shape, False, dtype)
+        x = paddle.static.data('x', x_shape, dtype)
         x.persistable = True
         out = paddle.tile(x, repeat_times)
         x_arr = np.random.uniform(-1, 1, x_shape).astype(dtype)
@@ -181,7 +180,7 @@ class TestExpandV2DoubleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float64
 
-        x = layers.data('x', x_shape, False, dtype)
+        x = paddle.static.data('x', x_shape, dtype)
         x.persistable = True
         out = paddle.expand(x, new_shape)
         x_arr = np.random.uniform(-1, 1, x_shape).astype(dtype)
@@ -213,7 +212,7 @@ class TestSqueezeDoubleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float64
 
-        x = layers.data('x', x_shape, False, dtype)
+        x = paddle.static.data('x', x_shape, dtype)
         x.persistable = True
         out = paddle.squeeze(x, axes)
         x_arr = np.random.uniform(-1, 1, x_shape).astype(dtype)
@@ -245,7 +244,7 @@ class TestUnsqueezeDoubleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float64
 
-        x = layers.data('x', x_shape, False, dtype)
+        x = paddle.static.data('x', x_shape, dtype)
         x.persistable = True
         out = paddle.unsqueeze(x, axes)
         x_arr = np.random.uniform(-1, 1, x_shape).astype(dtype)
@@ -274,7 +273,7 @@ class TestClipDoubleGradCheck(unittest.TestCase):
         x_shape = [2, 4, 10]
         dtype = np.float64
 
-        x = layers.data('x', x_shape, False, dtype)
+        x = paddle.static.data('x', x_shape, dtype)
         x.persistable = True
         out = paddle.clip(x, min=-1.0, max=1.0)
         x_arr = np.random.uniform(-5.0, 5.0, x_shape).astype(dtype)
@@ -299,7 +298,7 @@ class TestTransposeDoubleGradCheck(unittest.TestCase):
         perm = [1, 0]
         dtype = np.float64
 
-        x = layers.data('x', x_shape, False, dtype)
+        x = paddle.static.data('x', x_shape, dtype)
         x.persistable = True
         out = paddle.transpose(x, perm)
         x_arr = np.random.uniform(-1, 1, x_shape).astype(dtype)
@@ -321,7 +320,7 @@ class TestTransposeDoubleGradCheckCase1(unittest.TestCase):
         perm = [0, 2, 3, 1]
         dtype = np.float64
 
-        x = layers.data('x', x_shape, False, dtype)
+        x = paddle.static.data('x', x_shape, dtype)
         x.persistable = True
         out = paddle.transpose(x, perm)
         x_arr = np.random.uniform(-1, 1, x_shape).astype(dtype)
@@ -348,7 +347,7 @@ class TestConstantPadDoubleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float64
 
-        x = layers.data('x', x_shape, False, dtype)
+        x = paddle.static.data('x', x_shape, dtype)
         x.persistable = True
         out = paddle.nn.functional.pad(x, pad)
         x_arr = np.random.uniform(-1, 1, x_shape).astype(dtype)
@@ -375,7 +374,7 @@ class TestConstantPadDoubleGradCheckCase1(TestConstantPadDoubleGradCheck):
         pad = [1, 0, 1, 0, 1, 0, 1, 0]
         dtype = np.float64
 
-        x = layers.data('x', x_shape, False, dtype)
+        x = paddle.static.data('x', x_shape, dtype)
         x.persistable = True
         out = paddle.nn.functional.pad(x, pad)
         x_arr = np.random.uniform(-1, 1, x_shape).astype(dtype)
@@ -393,8 +392,8 @@ class TestConcatDoubleGradCheck(unittest.TestCase):
         pad = [1, 1, 1, 1]
         dtype = np.float64
 
-        x1 = layers.data('x', x_shape, False, dtype)
-        x2 = layers.data('x', x_shape, False, dtype)
+        x1 = paddle.static.data('x', x_shape, dtype)
+        x2 = paddle.static.data('x', x_shape, dtype)
         x1.persistable = True
         x2.persistable = True
         out = paddle.concat([x1, x2], axis=0)
@@ -423,10 +422,9 @@ class TestConcatDoubleGradCheck(unittest.TestCase):
 class TestAvgPool2DDoubleGradCheckCase1(unittest.TestCase):
     @prog_scope()
     def func(self, place):
-        input_NCHW = fluid.layers.data(
+        input_NCHW = paddle.static.data(
             name="input_NCHW",
             shape=[2, 3, 5, 5],
-            append_batch_size=False,
             dtype="float32",
         )
 
@@ -454,10 +452,9 @@ class TestAvgPool2DDoubleGradCheckCase2(unittest.TestCase):
 
     @prog_scope()
     def func(self, place):
-        input_NHWC = fluid.layers.data(
+        input_NHWC = paddle.static.data(
             name="input_NHWC",
             shape=[2, 5, 5, 3],
-            append_batch_size=False,
             dtype="float32",
         )
 
@@ -491,10 +488,9 @@ class TestAvgPool2DDoubleGradCheckCase3(unittest.TestCase):
 
     @prog_scope()
     def func(self, place):
-        input_NCHW = fluid.layers.data(
+        input_NCHW = paddle.static.data(
             name="input_NCHW",
             shape=[2, 3, 5, 5],
-            append_batch_size=False,
             dtype="float32",
         )
 
@@ -525,10 +521,9 @@ class TestAvgPool2DDoubleGradCheckCase4(unittest.TestCase):
 
     @prog_scope()
     def func(self, place):
-        input_NCHW = fluid.layers.data(
+        input_NCHW = paddle.static.data(
             name="input_NCHW",
             shape=[2, 3, 5, 5],
-            append_batch_size=False,
             dtype="float32",
         )
 
diff --git a/python/paddle/fluid/tests/unittests/test_nonzero_api.py b/python/paddle/fluid/tests/unittests/test_nonzero_api.py
index dce29b96e5559653cc72ddbfcfee35ca35bfc474..aac65fba54c8526569881ae1b8c019116a8074c9 100644
--- a/python/paddle/fluid/tests/unittests/test_nonzero_api.py
+++ b/python/paddle/fluid/tests/unittests/test_nonzero_api.py
@@ -25,7 +25,8 @@ class TestNonZeroAPI(unittest.TestCase):
     def test_nonzero_api_as_tuple(self):
         data = np.array([[True, False], [False, True]])
         with program_guard(Program(), Program()):
-            x = fluid.layers.data(name='x', shape=[-1, 2])
+            x = paddle.static.data(name='x', shape=[-1, 2], dtype='float32')
+            x.desc.set_need_check_feed(False)
             y = paddle.nonzero(x, as_tuple=True)
             self.assertEqual(type(y), tuple)
             self.assertEqual(len(y), 2)
@@ -40,7 +41,8 @@ class TestNonZeroAPI(unittest.TestCase):
 
         data = np.array([True, True, False])
         with program_guard(Program(), Program()):
-            x = fluid.layers.data(name='x', shape=[-1])
+            x = paddle.static.data(name='x', shape=[-1], dtype='float32')
+            x.desc.set_need_check_feed(False)
             y = paddle.nonzero(x, as_tuple=True)
             self.assertEqual(type(y), tuple)
             self.assertEqual(len(y), 1)
@@ -55,7 +57,8 @@ class TestNonZeroAPI(unittest.TestCase):
     def test_nonzero_api(self):
         data = np.array([[True, False], [False, True]])
         with program_guard(Program(), Program()):
-            x = fluid.layers.data(name='x', shape=[-1, 2])
+            x = paddle.static.data(name='x', shape=[-1, 2], dtype='float32')
+            x.desc.set_need_check_feed(False)
             y = paddle.nonzero(x)
             exe = fluid.Executor(fluid.CPUPlace())
             (res,) = exe.run(
@@ -66,7 +69,8 @@ class TestNonZeroAPI(unittest.TestCase):
 
         data = np.array([True, True, False])
         with program_guard(Program(), Program()):
-            x = fluid.layers.data(name='x', shape=[-1])
+            x = paddle.static.data(name='x', shape=[-1], dtype='float32')
+            x.desc.set_need_check_feed(False)
             y = paddle.nonzero(x)
             exe = fluid.Executor(fluid.CPUPlace())
             (res,) = exe.run(
diff --git a/python/paddle/fluid/tests/unittests/test_normalization_wrapper.py b/python/paddle/fluid/tests/unittests/test_normalization_wrapper.py
index 60cc8c7665edb9536d012bcb179fc1b20e01c60c..8ef60fce49806c3bf9c7c44c09f2cc2ec508ef5b 100644
--- a/python/paddle/fluid/tests/unittests/test_normalization_wrapper.py
+++ b/python/paddle/fluid/tests/unittests/test_normalization_wrapper.py
@@ -32,11 +32,10 @@ class TestNormalization(unittest.TestCase):
 
     def set_program(self, axis, epsilon):
         """Build the test program."""
-        data = fluid.layers.data(
+        data = paddle.static.data(
             name=self.data_desc["name"],
             shape=self.data_desc["shape"],
             dtype="float32",
-            append_batch_size=False,
         )
         data.stop_gradient = False
         l2_norm = paddle.nn.functional.normalize(
diff --git a/python/paddle/fluid/tests/unittests/test_npair_loss_op.py b/python/paddle/fluid/tests/unittests/test_npair_loss_op.py
index efbf8a98a87b8d761d4f0962162521286889f4a3..841d7acc2c0864cdf703cd34b8e43896e168584a 100644
--- a/python/paddle/fluid/tests/unittests/test_npair_loss_op.py
+++ b/python/paddle/fluid/tests/unittests/test_npair_loss_op.py
@@ -83,23 +83,20 @@ class TestNpairLossOp(unittest.TestCase):
             l2_reg=reg_lambda,
         )
 
-        anc = fluid.layers.data(
+        anc = paddle.static.data(
             dtype='float32',
             name='anc',
             shape=embeddings_anchor.shape,
-            append_batch_size=False,
         )
-        pos = fluid.layers.data(
+        pos = paddle.static.data(
             dtype='float32',
             name='pos',
             shape=embeddings_positive.shape,
-            append_batch_size=False,
         )
-        lab = fluid.layers.data(
+        lab = paddle.static.data(
             dtype='float32',
             name='lab',
             shape=row_labels.shape,
-            append_batch_size=False,
         )
 
         npair_loss_op = paddle.nn.functional.npair_loss(
diff --git a/python/paddle/fluid/tests/unittests/test_one_hot_v2_op.py b/python/paddle/fluid/tests/unittests/test_one_hot_v2_op.py
index 53e34ae70e6739c8c6228bd73f1e02e777a7c6fc..30bb75e0fa7833ef213a9c9547fea0761aab940c 100644
--- a/python/paddle/fluid/tests/unittests/test_one_hot_v2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_one_hot_v2_op.py
@@ -133,9 +133,13 @@ class TestOneHotOp_exception(unittest.TestCase):
     def test_check_output(self):
         program = Program()
         with program_guard(program):
-            x = fluid.layers.data(
-                name='x', shape=[self.dimension], dtype='float32', lod_level=1
+            x = paddle.static.data(
+                name='x',
+                shape=[-1, self.dimension],
+                dtype='float32',
+                lod_level=1,
             )
+            x.desc.set_need_check_feed(False)
             block = program.current_block()
             one_hot_out = block.create_var(
                 name="one_hot_out",
@@ -187,7 +191,8 @@ class TestOneHotOpApi(unittest.TestCase):
             )
 
     def _run(self, depth):
-        label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+        label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64")
+        label.desc.set_need_check_feed(False)
         one_hot_label = fluid.one_hot(input=label, depth=depth)
 
         place = fluid.CPUPlace()
@@ -211,12 +216,12 @@ class BadInputTestOnehotV2(unittest.TestCase):
         with fluid.program_guard(fluid.Program()):
 
             def test_bad_x():
-                label = fluid.layers.data(
+                label = paddle.static.data(
                     name="label",
-                    shape=[4],
-                    append_batch_size=False,
+                    shape=[-1, 4],
                     dtype="float32",
                 )
+                label.desc.set_need_check_feed(False)
                 one_hot_label = fluid.one_hot(input=label, depth=4)
 
             self.assertRaises(TypeError, test_bad_x)
diff --git a/python/paddle/fluid/tests/unittests/test_optimizer.py b/python/paddle/fluid/tests/unittests/test_optimizer.py
index d5f54d44829525474fdc9e365435d091e4ddfcca..20a4a7705e9aecacacbd413d42403f02d68626dc 100644
--- a/python/paddle/fluid/tests/unittests/test_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/test_optimizer.py
@@ -1181,10 +1181,12 @@ class TestRecomputeOptimizer(unittest.TestCase):
         scope = fluid.Scope()
         with fluid.scope_guard(scope):
             with program_guard(main_program, startup_program):
-                input_x = fluid.layers.data(
-                    name="x", shape=[3], dtype='float32'
+                input_x = paddle.static.data(
+                    name="x", shape=[-1, 3], dtype='float32'
+                )
+                input_y = paddle.static.data(
+                    name="y", shape=[-1, 1], dtype='int64'
                 )
-                input_y = fluid.layers.data(name="y", shape=[1], dtype='int64')
                 drop_res, prediction, cost = mlp(input_x, input_y)
                 sgd = fluid.optimizer.Adam(learning_rate=0.01)
                 sgd = fluid.optimizer.RecomputeOptimizer(sgd)
@@ -1243,10 +1245,12 @@ class TestRecomputeOptimizerCUDA(unittest.TestCase):
         scope = fluid.Scope()
         with fluid.scope_guard(scope):
             with program_guard(main_program, startup_program):
-                input_x = fluid.layers.data(
-                    name="x", shape=[3], dtype='float32'
+                input_x = paddle.static.data(
+                    name="x", shape=[-1, 3], dtype='float32'
+                )
+                input_y = paddle.static.data(
+                    name="y", shape=[-1, 1], dtype='int64'
                 )
-                input_y = fluid.layers.data(name="y", shape=[1], dtype='int64')
                 drop_res, prediction, cost = mlp(input_x, input_y)
                 sgd = fluid.optimizer.Adam(learning_rate=0.01)
                 sgd = fluid.optimizer.RecomputeOptimizer(sgd)
diff --git a/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py b/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py
index 731693f5cf7fc47f996236349bc6fb0f670b412f..3b32c9ca4ee7886dac1aa0f90786227b2d57507e 100644
--- a/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py
+++ b/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py
@@ -264,7 +264,7 @@ class TestMultiOptimizersMultiCardsError(unittest.TestCase):
             def fn_2(opt, avg_loss):
                 opt.minimize(avg_loss)
 
-            x = fluid.layers.data("X", [10], 'float32')
+            x = paddle.static.data("X", [-1, 10], 'float32')
             hidden = paddle.static.nn.fc(x, 5)
             avg_loss = paddle.mean(hidden)
 
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_drop_scope.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_drop_scope.py
index 8654f0ba5e97c0e86c1242003191bd80045f28b9..0984b02d750678c6ba904527a19f2edcf164a2d6 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_drop_scope.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_drop_scope.py
@@ -31,7 +31,7 @@ class TestParallelExecutorDropExeScope(unittest.TestCase):
         train_program = fluid.Program()
         startup_program = fluid.Program()
         with fluid.program_guard(train_program, startup_program):
-            data = fluid.layers.data(name='X', shape=[1], dtype='float32')
+            data = paddle.static.data(name='X', shape=[-1, 1], dtype='float32')
             hidden = paddle.static.nn.fc(x=data, size=10)
             loss = paddle.mean(hidden)
             test_program = fluid.default_main_program().clone(for_test=True)
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py
index 93fecfefb5a4475f24dc941bff7a3ae12cf39b2b..0d750ddcbe4c2660f303f6d262fd9e493ac242c1 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py
@@ -76,8 +76,8 @@ class TestMNISTDryRun(TestBase):
 
     @staticmethod
     def network_func():
-        img = fluid.layers.data(name='img', shape=[784], dtype='float32')
-        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+        img = paddle.static.data(name='img', shape=[-1, 784], dtype='float32')
+        label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
         hidden = img
         for _ in range(10):
             hidden = paddle.static.nn.fc(x=img, size=200, activation='tanh')
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py
index c92d3234597f7d73cb39fa533c242c5b8421a526..5aa87bc7b06982d85115836d873e8c819a89e146 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py
@@ -55,10 +55,12 @@ class TestFetchAndFeed(unittest.TestCase):
         startup = fluid.Program()
         startup.random_seed = seed
         with fluid.program_guard(main_program, startup):
-            data = fluid.layers.data(
-                name='image', shape=[3, 224, 224], dtype='float32'
+            data = paddle.static.data(
+                name='image', shape=[-1, 3, 224, 224], dtype='float32'
+            )
+            label = paddle.static.data(
+                name='label', shape=[-1, 1], dtype='int64'
             )
-            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
             out = Lenet(data, class_dim=102)
             loss = paddle.nn.functional.cross_entropy(
                 input=out, label=label, reduction='none', use_softmax=False
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py
index d81586fe251c34aa319b91759e5136550e1dd583..850ddc379c6092738c1deb7197d9a130258b52c9 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py
@@ -24,8 +24,8 @@ import paddle.fluid.core as core
 
 
 def simple_fc_net(use_feed):
-    img = fluid.layers.data(name='image', shape=[784], dtype='float32')
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    img = paddle.static.data(name='image', shape=[-1, 784], dtype='float32')
+    label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
     hidden = img
     for _ in range(4):
         hidden = paddle.static.nn.fc(
@@ -45,8 +45,8 @@ def simple_fc_net(use_feed):
 
 
 def fc_with_batchnorm(use_feed):
-    img = fluid.layers.data(name='image', shape=[784], dtype='float32')
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    img = paddle.static.data(name='image', shape=[-1, 784], dtype='float32')
+    label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
 
     hidden = img
     for _ in range(1):
diff --git a/python/paddle/fluid/tests/unittests/test_print_op.py b/python/paddle/fluid/tests/unittests/test_print_op.py
index 75e8e93987dd5c77bc1af8ab0f7d5b33976cbaa1..19be1c7526701f912f96efaab84b3b979652439e 100755
--- a/python/paddle/fluid/tests/unittests/test_print_op.py
+++ b/python/paddle/fluid/tests/unittests/test_print_op.py
@@ -19,7 +19,6 @@ from simple_nets import init_data, simple_fc_net
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 from paddle.fluid import core
 from paddle.fluid.framework import switch_main_program
 from paddle.static import Program, program_guard
@@ -36,7 +35,7 @@ class TestPrintOpCPU(unittest.TestCase):
         self.x_tensor.set_recursive_sequence_lengths([[1, 1]])
 
     def build_network(self, only_forward, **kargs):
-        x = layers.data('x', shape=[3], dtype='float32', lod_level=1)
+        x = paddle.static.data('x', shape=[-1, 3], dtype='float32', lod_level=1)
         x.stop_gradient = False
         paddle.static.Print(input=x, **kargs)
         loss = paddle.mean(x)
@@ -60,7 +59,7 @@ class TestPrintOpCPU(unittest.TestCase):
         )
 
     def test_all_parameters(self):
-        x = layers.data('x', shape=[3], dtype='float32', lod_level=1)
+        x = paddle.static.data('x', shape=[-1, 3], dtype='float32', lod_level=1)
         x.stop_gradient = False
 
         for print_tensor_name in [True, False]:
diff --git a/python/paddle/fluid/tests/unittests/test_profiler.py b/python/paddle/fluid/tests/unittests/test_profiler.py
index b20538cad5b6679ad49c2a3f9357bac420b9a812..e22deb5df86df3ba6417178267258deebdce3269 100644
--- a/python/paddle/fluid/tests/unittests/test_profiler.py
+++ b/python/paddle/fluid/tests/unittests/test_profiler.py
@@ -37,7 +37,9 @@ class TestProfiler(unittest.TestCase):
         startup_program = fluid.Program()
         main_program = fluid.Program()
         with fluid.program_guard(main_program, startup_program):
-            image = fluid.layers.data(name='x', shape=[784], dtype='float32')
+            image = paddle.static.data(
+                name='x', shape=[-1, 784], dtype='float32'
+            )
             hidden1 = paddle.static.nn.fc(x=image, size=64, activation='relu')
             i = layers.zeros(shape=[1], dtype='int64')
             counter = fluid.layers.zeros(
@@ -62,7 +64,7 @@ class TestProfiler(unittest.TestCase):
             predict = paddle.static.nn.fc(
                 x=hidden2, size=10, activation='softmax'
             )
-            label = fluid.layers.data(name='y', shape=[1], dtype='int64')
+            label = paddle.static.data(name='y', shape=[-1, 1], dtype='int64')
             cost = paddle.nn.functional.cross_entropy(
                 input=predict, label=label, reduction='none', use_softmax=False
             )
diff --git a/python/paddle/fluid/tests/unittests/test_program.py b/python/paddle/fluid/tests/unittests/test_program.py
index 56b8b35234d8b145ff1c1106345e029e15198573..dfc9b7572da4bf5266403143932db23d950cf25c 100644
--- a/python/paddle/fluid/tests/unittests/test_program.py
+++ b/python/paddle/fluid/tests/unittests/test_program.py
@@ -16,7 +16,6 @@ import unittest
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 from paddle.fluid.framework import Program, default_main_program, program_guard
 
 paddle.enable_static()
@@ -98,7 +97,7 @@ class TestProgram(unittest.TestCase):
         main_program = Program()
         startup_program = Program()
         with program_guard(main_program, startup_program):
-            d = layers.data(name='x', shape=[784], dtype='float32')
+            d = paddle.static.data(name='x', shape=[-1, 784], dtype='float32')
             hidden = paddle.static.nn.fc(x=d, size=100)
             paddle.static.nn.fc(x=hidden, size=100)
 
diff --git a/python/paddle/fluid/tests/unittests/test_program_prune_backward.py b/python/paddle/fluid/tests/unittests/test_program_prune_backward.py
index 337feb16174cbfd358d2ae87e2d4daf156f8acd2..5364dcaa6e14a3574fa1d776f9d9706c613bc542 100755
--- a/python/paddle/fluid/tests/unittests/test_program_prune_backward.py
+++ b/python/paddle/fluid/tests/unittests/test_program_prune_backward.py
@@ -30,8 +30,8 @@ import paddle.fluid.core as core
 
 
 def simple_fc_net_with_accuracy(use_feed):
-    img = fluid.layers.data(name='image', shape=[784], dtype='float32')
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    img = paddle.static.data(name='image', shape=[-1, 784], dtype='float32')
+    label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
 
     hidden = img
     for _ in range(4):
@@ -53,12 +53,12 @@ def simple_fc_net_with_accuracy(use_feed):
 
 
 def cond_net(use_feed=None):
-    x = fluid.layers.data(name="x", shape=[4], dtype='float32')
-    label = fluid.layers.data('label', shape=[1], dtype='int64')
+    x = paddle.static.data(name="x", shape=[-1, 4], dtype='float32')
+    label = paddle.static.data('label', shape=[-1, 1], dtype='int64')
     prediction = paddle.static.nn.fc(x, size=1, activation=None)
 
     def loss1(pred, label):
-        x = fluid.layers.data(name="x", shape=[4], dtype='float32')
+        x = paddle.static.data(name="x", shape=[-1, 4], dtype='float32')
         loss = paddle.nn.functional.cross_entropy(
             input=pred, label=label, reduction='none', use_softmax=False
         )
@@ -82,12 +82,12 @@ def cond_net(use_feed=None):
 
 
 def optimization_in_cond_net(with_optimize=False):
-    x = fluid.layers.data(name="x", shape=[4], dtype='float32')
-    label = fluid.layers.data('label', shape=[1], dtype='int64')
+    x = paddle.static.data(name="x", shape=[-1, 4], dtype='float32')
+    label = paddle.static.data('label', shape=[-1, 1], dtype='int64')
     prediction = paddle.static.nn.fc(x, size=1, activation=None)
 
     def loss1(opt, pred, label, with_optimize):
-        x = fluid.layers.data(name="x", shape=[4], dtype='float32')
+        x = paddle.static.data(name="x", shape=[-1, 4], dtype='float32')
         loss = paddle.nn.functional.cross_entropy(
             input=pred, label=label, reduction='none', use_softmax=False
         )
diff --git a/python/paddle/fluid/tests/unittests/test_program_to_string.py b/python/paddle/fluid/tests/unittests/test_program_to_string.py
index 55f32b687d914726a57b82106619353b8183cd11..04a816a54125f1037b8a4e4dd7309134fbb78da1 100644
--- a/python/paddle/fluid/tests/unittests/test_program_to_string.py
+++ b/python/paddle/fluid/tests/unittests/test_program_to_string.py
@@ -21,9 +21,7 @@ import paddle.fluid as fluid
 class TestProgram(unittest.TestCase):
     def test_program_to_string(self):
         prog = fluid.default_main_program()
-        a = fluid.layers.data(
-            name="X", shape=[2, 3], dtype="float32", append_batch_size=False
-        )
+        a = paddle.static.data(name="X", shape=[2, 3], dtype="float32")
         c = paddle.static.nn.fc(a, size=3)
         prog_string = prog.to_string(throw_on_error=True, with_details=False)
         prog_string_with_details = prog.to_string(
diff --git a/python/paddle/fluid/tests/unittests/test_prune.py b/python/paddle/fluid/tests/unittests/test_prune.py
index 2c97f49aeea573ea043a2a1433f2ee0cbcb72f26..30e3aefe0a738ca2af431aa89ec32a6db4ae5d19 100644
--- a/python/paddle/fluid/tests/unittests/test_prune.py
+++ b/python/paddle/fluid/tests/unittests/test_prune.py
@@ -25,8 +25,10 @@ import paddle.fluid.framework as framework
 
 class TestPrune(unittest.TestCase):
     def net(self):
-        x = fluid.layers.data(name='x', shape=[2], dtype='float32')
-        label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+        x = paddle.static.data(name='x', shape=[-1, 2], dtype='float32')
+        x.desc.set_need_check_feed(False)
+        label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64")
+        label.desc.set_need_check_feed(False)
         y = paddle.static.nn.fc(x=[x], size=2, activation="softmax")
         loss = paddle.nn.functional.cross_entropy(
             input=y, label=label, reduction='none', use_softmax=False
@@ -161,8 +163,10 @@ def _mock_guard(mock):
 
 class TestExecutorRunAutoPrune(unittest.TestCase):
     def net1(self):
-        x = fluid.layers.data(name='x', shape=[2], dtype='float32')
-        label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+        x = paddle.static.data(name='x', shape=[-1, 2], dtype='float32')
+        x.desc.set_need_check_feed(False)
+        label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64")
+        label.desc.set_need_check_feed(False)
         w_param_attrs = fluid.ParamAttr(
             name="fc_weight",
             learning_rate=0.5,
@@ -185,9 +189,12 @@ class TestExecutorRunAutoPrune(unittest.TestCase):
         return x, y, label, loss1, loss2, w_param_attrs
 
     def net2(self):
-        x1 = fluid.layers.data(name='x1', shape=[2], dtype='float32')
-        x2 = fluid.layers.data(name='x2', shape=[2], dtype='float32')
-        label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+        x1 = paddle.static.data(name='x1', shape=[-1, 2], dtype='float32')
+        x1.desc.set_need_check_feed(False)
+        x2 = paddle.static.data(name='x2', shape=[-1, 2], dtype='float32')
+        x2.desc.set_need_check_feed(False)
+        label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64")
+        label.desc.set_need_check_feed(False)
         w1_param_attrs = fluid.ParamAttr(
             name="fc_weight1",
             learning_rate=0.5,
diff --git a/python/paddle/fluid/tests/unittests/test_pull_gpups_sparse_op.py b/python/paddle/fluid/tests/unittests/test_pull_gpups_sparse_op.py
index b537a31b3aae8792cebc51890788fa4576f07235..1e50e1a08c679cfdb7f070765f5f77dcca9e32b4 100644
--- a/python/paddle/fluid/tests/unittests/test_pull_gpups_sparse_op.py
+++ b/python/paddle/fluid/tests/unittests/test_pull_gpups_sparse_op.py
@@ -32,8 +32,8 @@ class TestPullGpupsSparse(unittest.TestCase):
         slots = []
         with fluid.program_guard(train_program, startup_program):
 
-            l = fluid.layers.data(
-                name='input', shape=[1], dtype="int64", lod_level=1
+            l = paddle.static.data(
+                name='input', shape=[-1, 1], dtype="int64", lod_level=1
             )
             slots.append(l)
             output = _pull_gpups_sparse(
diff --git a/python/paddle/fluid/tests/unittests/test_py_func_op.py b/python/paddle/fluid/tests/unittests/test_py_func_op.py
index 69e363ee71863c6508f03887973fe6830c76c70b..a90e37a4755c23632e9f54a2b2dbdffbca135d6f 100644
--- a/python/paddle/fluid/tests/unittests/test_py_func_op.py
+++ b/python/paddle/fluid/tests/unittests/test_py_func_op.py
@@ -179,8 +179,12 @@ def test_main(use_cuda, use_py_func_op, use_parallel_executor):
         with fluid.scope_guard(fluid.core.Scope()):
             gen = paddle.seed(1)
             np.random.seed(1)
-            img = fluid.layers.data(name='image', shape=[784], dtype='float32')
-            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+            img = paddle.static.data(
+                name='image', shape=[-1, 784], dtype='float32'
+            )
+            label = paddle.static.data(
+                name='label', shape=[-1, 1], dtype='int64'
+            )
             loss = simple_fc_net(img, label, use_py_func_op)
             optimizer = fluid.optimizer.SGD(learning_rate=1e-3)
             optimizer.minimize(loss)
diff --git a/python/paddle/fluid/tests/unittests/test_py_reader_combination.py b/python/paddle/fluid/tests/unittests/test_py_reader_combination.py
index e238eeaf4cb8544a4a6b19589f5d3950c6562d47..db664c55ebee7fd0c35763e5c3aea11d2961b2d1 100644
--- a/python/paddle/fluid/tests/unittests/test_py_reader_combination.py
+++ b/python/paddle/fluid/tests/unittests/test_py_reader_combination.py
@@ -56,10 +56,12 @@ class TestPyReaderCombination(unittest.TestCase):
 
     def main_impl(self, place):
         with fluid.program_guard(fluid.Program(), fluid.Program()):
-            image = fluid.layers.data(
-                name='image', dtype='float32', shape=[784]
+            image = paddle.static.data(
+                name='image', dtype='float32', shape=[-1, 784]
+            )
+            label = paddle.static.data(
+                name='label', dtype='int64', shape=[-1, 1]
             )
-            label = fluid.layers.data(name='label', dtype='int64', shape=[1])
 
             py_reader1 = fluid.io.PyReader(
                 feed_list=[image, label], capacity=16, iterable=True
diff --git a/python/paddle/fluid/tests/unittests/test_py_reader_return_list.py b/python/paddle/fluid/tests/unittests/test_py_reader_return_list.py
index d18a66b82854cb73c86a40fa48a5c0df74b1152e..ddef600b166667d618330833d31ef934cd9fc580 100644
--- a/python/paddle/fluid/tests/unittests/test_py_reader_return_list.py
+++ b/python/paddle/fluid/tests/unittests/test_py_reader_return_list.py
@@ -38,8 +38,8 @@ class TestPyReader(unittest.TestCase):
 
         for return_list in [True, False]:
             with fluid.program_guard(fluid.Program(), fluid.Program()):
-                image = fluid.layers.data(
-                    name='image', shape=[784, 784], dtype='float32'
+                image = paddle.static.data(
+                    name='image', shape=[-1, 784, 784], dtype='float32'
                 )
                 reader = fluid.io.PyReader(
                     feed_list=[image],
diff --git a/python/paddle/fluid/tests/unittests/test_py_reader_sample_generator.py b/python/paddle/fluid/tests/unittests/test_py_reader_sample_generator.py
index 19cd4f546acbcd395ad3126004163030632a04e6..afc81e0f2a2d837610bf57cf9a28d1df763265ac 100644
--- a/python/paddle/fluid/tests/unittests/test_py_reader_sample_generator.py
+++ b/python/paddle/fluid/tests/unittests/test_py_reader_sample_generator.py
@@ -54,8 +54,10 @@ class TestCaseBase(unittest.TestCase):
         return ret
 
     def run_main(self, reader, use_sample_generator, iterable, drop_last):
-        image = fluid.layers.data(name='image', dtype='float32', shape=[784])
-        label = fluid.layers.data(name='label', dtype='int64', shape=[1])
+        image = paddle.static.data(
+            name='image', dtype='float32', shape=[-1, 784]
+        )
+        label = paddle.static.data(name='label', dtype='int64', shape=[-1, 1])
         py_reader = fluid.io.PyReader(
             feed_list=[image, label],
             capacity=16,
diff --git a/python/paddle/fluid/tests/unittests/test_quantile_and_nanquantile.py b/python/paddle/fluid/tests/unittests/test_quantile_and_nanquantile.py
index 1f5aaa6b0952b90210d28215b30e95436f7455a4..7d6d61e64c243dbdb980f786e99125158d37f05b 100644
--- a/python/paddle/fluid/tests/unittests/test_quantile_and_nanquantile.py
+++ b/python/paddle/fluid/tests/unittests/test_quantile_and_nanquantile.py
@@ -209,12 +209,6 @@ class TestError(unittest.TestCase):
 
         self.assertRaises(ValueError, test_axis_value_error_2)
 
-        # Test error with no valid axis
-        def test_axis_value_error_3():
-            paddle_res = paddle.quantile(self.x, q=0.4, axis=[])
-
-        self.assertRaises(ValueError, test_axis_value_error_3)
-
 
 class TestQuantileRuntime(unittest.TestCase):
     """
diff --git a/python/paddle/fluid/tests/unittests/test_reader_reset.py b/python/paddle/fluid/tests/unittests/test_reader_reset.py
index bbdade712e111372db1a008f8032710aa357ca76..9524068316bd696b929d3b175059d609ac9317a1 100644
--- a/python/paddle/fluid/tests/unittests/test_reader_reset.py
+++ b/python/paddle/fluid/tests/unittests/test_reader_reset.py
@@ -46,10 +46,12 @@ class TestReaderReset(unittest.TestCase):
         startup_prog = fluid.Program()
 
         with fluid.program_guard(main_prog, startup_prog):
-            image = fluid.layers.data(
-                name='image', shape=self.ins_shape, dtype='float32'
+            image = paddle.static.data(
+                name='image', shape=[-1] + self.ins_shape, dtype='float32'
+            )
+            label = paddle.static.data(
+                name='label', shape=[-1, 1], dtype='int64'
             )
-            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
             data_reader_handle = fluid.io.PyReader(
                 feed_list=[image, label],
                 capacity=16,
diff --git a/python/paddle/fluid/tests/unittests/test_recurrent_op.py b/python/paddle/fluid/tests/unittests/test_recurrent_op.py
index db1abc9ef0ec441b1448a7fe28242d11c7225c54..4ea5ed0e0d35fdce9b11f7e3df5402d172c67ea4 100644
--- a/python/paddle/fluid/tests/unittests/test_recurrent_op.py
+++ b/python/paddle/fluid/tests/unittests/test_recurrent_op.py
@@ -135,15 +135,14 @@ class RecurrentOpTest1(unittest.TestCase):
             self.output = paddle.mean(self.create_rnn_op())
 
     def create_rnn_op(self):
-        x = layers.data(
+        x = paddle.static.data(
             shape=[self.sent_len, self.batch_size, self.input_dim],
             dtype='float32',
             name='x',
-            append_batch_size=False,
         )
         x.stop_gradient = False
-        h_boot = layers.data(
-            shape=[self.input_dim], dtype='float32', name='h_boot'
+        h_boot = paddle.static.data(
+            shape=[-1, self.input_dim], dtype='float32', name='h_boot'
         )
         h_boot.stop_gradient = False
 
@@ -281,15 +280,14 @@ class RecurrentOpTest2(RecurrentOpTest1):
             self.output = paddle.mean(self.create_rnn_op())
 
     def create_rnn_op(self):
-        x = layers.data(
+        x = paddle.static.data(
             shape=[self.sent_len, self.batch_size, self.input_dim],
             dtype='float32',
             name='x',
-            append_batch_size=False,
         )
         x.stop_gradient = False
-        h_boot = layers.data(
-            shape=[self.input_dim], dtype='float32', name='h_boot'
+        h_boot = paddle.static.data(
+            shape=[-1, self.input_dim], dtype='float32', name='h_boot'
         )
         h_boot.stop_gradient = False
 
@@ -390,25 +388,22 @@ class RecurrentOpMultipleMemoryTest(RecurrentOpTest1):
             self.output = paddle.mean(self.create_rnn_op())
 
     def create_rnn_op(self):
-        x = layers.data(
+        x = paddle.static.data(
             shape=[self.sent_len, self.batch_size, self.input_dim],
             dtype='float32',
             name='x',
-            append_batch_size=False,
         )
         x.stop_gradient = False
-        h_boot1 = layers.data(
+        h_boot1 = paddle.static.data(
             shape=[self.batch_size, self.input_dim],
             dtype='float32',
             name='h_boot1',
-            append_batch_size=False,
         )
         h_boot1.stop_gradient = False
-        h_boot2 = layers.data(
+        h_boot2 = paddle.static.data(
             shape=[self.batch_size, self.input_dim],
             dtype='float32',
             name='h_boot2',
-            append_batch_size=False,
         )
         h_boot2.stop_gradient = False
 
@@ -477,11 +472,10 @@ class RecurrentOpNoMemBootTest(RecurrentOpTest1):
             self.output = paddle.mean(self.create_rnn_op())
 
     def create_rnn_op(self):
-        x = layers.data(
+        x = paddle.static.data(
             shape=[self.sent_len, self.batch_size, self.input_dim],
             dtype='float32',
             name='x',
-            append_batch_size=False,
         )
         x.stop_gradient = False
 
@@ -582,34 +576,30 @@ class RecurrentOpSubBlockTest(RecurrentOpTest1):
             self.output = paddle.mean(rnn_out)
 
     def create_rnn_op(self):
-        x = layers.data(
+        x = paddle.static.data(
             shape=[self.sent_len, self.batch_size, self.input_dim],
             dtype='float32',
             name='x',
-            append_batch_size=False,
         )
         x.stop_gradient = False
 
-        emb = layers.data(
+        emb = paddle.static.data(
             name='emb',
             shape=[self.sent_len, self.batch_size, self.input_dim],
             dtype='float32',
-            append_batch_size=False,
         )
         emb.stop_gradient = False
 
-        w1 = layers.data(
+        w1 = paddle.static.data(
             shape=[self.input_dim, self.input_dim],
             dtype='float32',
             name='w1',
-            append_batch_size=False,
         )
         w1.stop_gradient = False
-        w2 = layers.data(
+        w2 = paddle.static.data(
             shape=[self.input_dim * 2, self.input_dim],
             dtype='float32',
             name='w2',
-            append_batch_size=False,
         )
         w2.stop_gradient = False
 
@@ -675,15 +665,14 @@ class RecurrentOpStopGradientTest(RecurrentOpTest1):
             self.output = paddle.mean(self.create_rnn_op())
 
     def create_rnn_op(self):
-        x = layers.data(
+        x = paddle.static.data(
             shape=[self.sent_len, self.batch_size, self.input_dim],
             dtype="float32",
             name="x",
-            append_batch_size=False,
         )
         x.stop_gradient = False
-        h_boot = layers.data(
-            shape=[self.input_dim], dtype="float32", name="h_boot"
+        h_boot = paddle.static.data(
+            shape=[-1, self.input_dim], dtype="float32", name="h_boot"
         )
         h_boot.stop_gradient = True
 
diff --git a/python/paddle/fluid/tests/unittests/test_reduce_op.py b/python/paddle/fluid/tests/unittests/test_reduce_op.py
index b101c24e77f8a08b33028aa7efdb586cd41a14cf..026554f010d447453a77e8c957549fc370f4d643 100644
--- a/python/paddle/fluid/tests/unittests/test_reduce_op.py
+++ b/python/paddle/fluid/tests/unittests/test_reduce_op.py
@@ -507,8 +507,8 @@ class TestAllOpError(unittest.TestCase):
             input1 = 12
             self.assertRaises(TypeError, paddle.all, input1)
             # The input dtype of reduce_all_op must be bool.
-            input2 = fluid.layers.data(
-                name='input2', shape=[12, 10], dtype="int32"
+            input2 = paddle.static.data(
+                name='input2', shape=[-1, 12, 10], dtype="int32"
             )
             self.assertRaises(TypeError, paddle.all, input2)
 
@@ -624,8 +624,8 @@ class TestAnyOpError(unittest.TestCase):
             input1 = 12
             self.assertRaises(TypeError, paddle.any, input1)
             # The input dtype of reduce_any_op must be bool.
-            input2 = fluid.layers.data(
-                name='input2', shape=[12, 10], dtype="int32"
+            input2 = paddle.static.data(
+                name='input2', shape=[-1, 12, 10], dtype="int32"
             )
             self.assertRaises(TypeError, paddle.any, input2)
 
@@ -918,7 +918,7 @@ class TestReduceSumOpError(unittest.TestCase):
             )
             self.assertRaises(TypeError, paddle.sum, x1)
             # The input dtype of reduce_sum_op  must be float32 or float64 or int32 or int64.
-            x2 = fluid.layers.data(name='x2', shape=[4], dtype="uint8")
+            x2 = paddle.static.data(name='x2', shape=[-1, 4], dtype="uint8")
             self.assertRaises(TypeError, paddle.sum, x2)
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_registry.py b/python/paddle/fluid/tests/unittests/test_registry.py
index 1427d0e07548aa32aa2543e8c3388cbf0126c2ba..4d40eb9965f78134f9c6683ec9ee1cc17ef4276e 100644
--- a/python/paddle/fluid/tests/unittests/test_registry.py
+++ b/python/paddle/fluid/tests/unittests/test_registry.py
@@ -24,7 +24,7 @@ import paddle.fluid as fluid
 class TestRegistry(unittest.TestCase):
     @prog_scope()
     def test_registry_layer(self):
-        x = fluid.layers.data(name='X', shape=[10, 10], dtype='float32')
+        x = paddle.static.data(name='X', shape=[-1, 10, 10], dtype='float32')
         output = paddle.mean(x)
 
         place = fluid.CPUPlace()
diff --git a/python/paddle/fluid/tests/unittests/test_regularizer.py b/python/paddle/fluid/tests/unittests/test_regularizer.py
index 5ab643819d7665ab427b5bc0bcefa69d25e2d2e5..13420553f1fe980c90f6f55a06a03dd84ed0d086 100644
--- a/python/paddle/fluid/tests/unittests/test_regularizer.py
+++ b/python/paddle/fluid/tests/unittests/test_regularizer.py
@@ -198,10 +198,12 @@ class TestRegularizer(unittest.TestCase):
         with self.scope_prog_guard(
             main_prog=main_prog, startup_prog=startup_prog
         ):
-            data = fluid.layers.data(
-                name="words", shape=[1], dtype="int64", lod_level=1
+            data = paddle.static.data(
+                name="words", shape=[-1, 1], dtype="int64", lod_level=1
+            )
+            label = paddle.static.data(
+                name="label", shape=[-1, 1], dtype="int64"
             )
-            label = fluid.layers.data(name="label", shape=[1], dtype="int64")
 
             avg_cost = model(data, label, self.word_len)
 
@@ -221,10 +223,12 @@ class TestRegularizer(unittest.TestCase):
         with self.scope_prog_guard(
             main_prog=main_prog, startup_prog=startup_prog
         ):
-            data = fluid.layers.data(
-                name="words", shape=[1], dtype="int64", lod_level=1
+            data = paddle.static.data(
+                name="words", shape=[-1, 1], dtype="int64", lod_level=1
+            )
+            label = paddle.static.data(
+                name="label", shape=[-1, 1], dtype="int64"
             )
-            label = fluid.layers.data(name="label", shape=[1], dtype="int64")
 
             avg_cost_l2 = model(data, label, self.word_len)
 
diff --git a/python/paddle/fluid/tests/unittests/test_regularizer_api.py b/python/paddle/fluid/tests/unittests/test_regularizer_api.py
index a863ed45fdc17f6a2f5d1e120dbcb0c745883ff1..0d3e99a6c7f1d40f05519b520c7c7d7b855960f7 100644
--- a/python/paddle/fluid/tests/unittests/test_regularizer_api.py
+++ b/python/paddle/fluid/tests/unittests/test_regularizer_api.py
@@ -105,10 +105,12 @@ class TestRegularizer(unittest.TestCase):
         with self.scope_prog_guard(
             main_prog=main_prog, startup_prog=startup_prog
         ):
-            data = fluid.layers.data(
-                name="words", shape=[1], dtype="int64", lod_level=1
+            data = paddle.static.data(
+                name="words", shape=[-1, 1], dtype="int64", lod_level=1
+            )
+            label = paddle.static.data(
+                name="label", shape=[-1, 1], dtype="int64"
             )
-            label = fluid.layers.data(name="label", shape=[1], dtype="int64")
 
             avg_cost = model(data, label, self.word_len)
 
@@ -129,10 +131,12 @@ class TestRegularizer(unittest.TestCase):
         with self.scope_prog_guard(
             main_prog=main_prog, startup_prog=startup_prog
         ):
-            data = fluid.layers.data(
-                name="words", shape=[1], dtype="int64", lod_level=1
+            data = paddle.static.data(
+                name="words", shape=[-1, 1], dtype="int64", lod_level=1
+            )
+            label = paddle.static.data(
+                name="label", shape=[-1, 1], dtype="int64"
             )
-            label = fluid.layers.data(name="label", shape=[1], dtype="int64")
 
             avg_cost_l2 = model(data, label, self.word_len)
 
diff --git a/python/paddle/fluid/tests/unittests/test_renorm_op.py b/python/paddle/fluid/tests/unittests/test_renorm_op.py
index 7a4cac34ecdae54d6fbfaa5942d52f1058b152e7..79dc144ee57088e35fcd539c26ea2093e02542fb 100644
--- a/python/paddle/fluid/tests/unittests/test_renorm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_renorm_op.py
@@ -38,7 +38,6 @@ class TestRenormAPI(unittest.TestCase):
 
         # case 1:
         with program_guard(Program(), Program()):
-            # x = fluid.layers.data(name = 'x',shape=[-1, 2, 3])
             x = paddle.static.data(name="x", shape=[-1, 2, 3], dtype='float64')
             z = paddle.renorm(x, self.p, self.dim, self.max_norm)
             exe = fluid.Executor(fluid.CPUPlace())
diff --git a/python/paddle/fluid/tests/unittests/test_repeat_interleave_op.py b/python/paddle/fluid/tests/unittests/test_repeat_interleave_op.py
index 6b602fa74131ef3c3e49c1ff0c7edb345dee753c..90877a3047e2c9f52ce8a58a9d93675f57156d8f 100644
--- a/python/paddle/fluid/tests/unittests/test_repeat_interleave_op.py
+++ b/python/paddle/fluid/tests/unittests/test_repeat_interleave_op.py
@@ -120,13 +120,14 @@ class TestIndexSelectAPI(unittest.TestCase):
 
         # case 1:
         with program_guard(Program(), Program()):
-            x = fluid.layers.data(name='x', shape=[-1, 4])
-            index = fluid.layers.data(
+            x = paddle.static.data(name='x', shape=[-1, 4], dtype='float32')
+            x.desc.set_need_check_feed(False)
+            index = paddle.static.data(
                 name='repeats_',
                 shape=[4],
                 dtype='int32',
-                append_batch_size=False,
             )
+            index.desc.set_need_check_feed(False)
             z = paddle.repeat_interleave(x, index, axis=1)
             exe = fluid.Executor(fluid.CPUPlace())
             (res,) = exe.run(
@@ -140,13 +141,14 @@ class TestIndexSelectAPI(unittest.TestCase):
         # case 2:
         repeats = np.array([1, 2, 1]).astype('int32')
         with program_guard(Program(), Program()):
-            x = fluid.layers.data(name='x', shape=[-1, 4])
-            index = fluid.layers.data(
+            x = paddle.static.data(name='x', shape=[-1, 4], dtype="float32")
+            x.desc.set_need_check_feed(False)
+            index = paddle.static.data(
                 name='repeats_',
                 shape=[3],
                 dtype='int32',
-                append_batch_size=False,
             )
+            index.desc.set_need_check_feed(False)
             z = paddle.repeat_interleave(x, index, axis=0)
             exe = fluid.Executor(fluid.CPUPlace())
             (res,) = exe.run(
@@ -162,7 +164,8 @@ class TestIndexSelectAPI(unittest.TestCase):
 
         repeats = 2
         with program_guard(Program(), Program()):
-            x = fluid.layers.data(name='x', shape=[-1, 4])
+            x = paddle.static.data(name='x', shape=[-1, 4], dtype='float32')
+            x.desc.set_need_check_feed(False)
             z = paddle.repeat_interleave(x, repeats, axis=0)
             exe = fluid.Executor(fluid.CPUPlace())
             (res,) = exe.run(
@@ -173,7 +176,8 @@ class TestIndexSelectAPI(unittest.TestCase):
 
         # case 3 zero_dim:
         with program_guard(Program(), Program()):
-            x = fluid.layers.data(name='x', shape=[])
+            x = paddle.static.data(name='x', shape=[-1], dtype="float32")
+            x.desc.set_need_check_feed(False)
             z = paddle.repeat_interleave(x, repeats)
             exe = fluid.Executor(fluid.CPUPlace())
             (res,) = exe.run(
diff --git a/python/paddle/fluid/tests/unittests/test_resnet50_with_cinn.py b/python/paddle/fluid/tests/unittests/test_resnet50_with_cinn.py
index 277f3558d09d4f6d744b0edd1f9fc9ffcf2e4f9e..d580636ce50d7efe02454cb57b3b23c0994c7f93 100644
--- a/python/paddle/fluid/tests/unittests/test_resnet50_with_cinn.py
+++ b/python/paddle/fluid/tests/unittests/test_resnet50_with_cinn.py
@@ -135,9 +135,9 @@ class TestResnet50Accuracy(unittest.TestCase):
 
         loop_num = 10
         feed = self.generate_random_data(loop_num)
-        core.set_prim_enabled(True)
+        core._set_prim_backward_enabled(True)
         loss_c = self.train(place, loop_num, feed, use_cinn=True)
-        core.set_prim_enabled(False)
+        core._set_prim_backward_enabled(False)
         loss_p = self.train(place, loop_num, feed, use_cinn=True)
         print("Losses of Composite + CINN:")
         print(loss_c)
diff --git a/python/paddle/fluid/tests/unittests/test_rmsprop_op.py b/python/paddle/fluid/tests/unittests/test_rmsprop_op.py
index 01a58266aebd46c02a035e5f2369ba4036ac72b2..63b22fb9fc99834ebc9aea3c44f96e2315e3cda0 100644
--- a/python/paddle/fluid/tests/unittests/test_rmsprop_op.py
+++ b/python/paddle/fluid/tests/unittests/test_rmsprop_op.py
@@ -277,8 +277,8 @@ class TestRMSPropV2(unittest.TestCase):
         place = fluid.CPUPlace()
         main = fluid.Program()
         with fluid.program_guard(main):
-            x = fluid.layers.data(name='x', shape=[13], dtype='float32')
-            y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+            x = paddle.static.data(name='x', shape=[-1, 13], dtype='float32')
+            y = paddle.static.data(name='y', shape=[-1, 1], dtype='float32')
             y_predict = paddle.static.nn.fc(x, size=1)
             cost = paddle.nn.functional.square_error_cost(
                 input=y_predict, label=y
diff --git a/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py b/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py
index 9c9c5520c027d8b7444e7a7343700bce23dbc1a5..730daa1cbeb4376d5b3266c9d8151c59fc725643 100644
--- a/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py
+++ b/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py
@@ -22,7 +22,6 @@ from rnn.rnn_numpy import rnn as numpy_rnn
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import paddle.fluid.layers as layers
 import paddle.fluid.layers.utils as utils
 from paddle.fluid import framework
 from paddle.fluid.executor import Executor
@@ -42,10 +41,9 @@ class TestRnnError(unittest.TestCase):
             inputs = fluid.data(
                 name='inputs', shape=[None, input_size], dtype='float32'
             )
-            pre_hidden = layers.data(
+            pre_hidden = paddle.static.data(
                 name='pre_hidden',
                 shape=[None, hidden_size],
-                append_batch_size=False,
                 dtype='float32',
             )
             inputs_basic_lstm = fluid.data(
diff --git a/python/paddle/fluid/tests/unittests/test_roll_op.py b/python/paddle/fluid/tests/unittests/test_roll_op.py
index 1bb4e3392617b0b706ffb303a345e3832aefdea8..9156f728940f76b15f36915e79871a39e21635d1 100644
--- a/python/paddle/fluid/tests/unittests/test_roll_op.py
+++ b/python/paddle/fluid/tests/unittests/test_roll_op.py
@@ -68,7 +68,8 @@ class TestRollAPI(unittest.TestCase):
         paddle.enable_static()
         # case 1:
         with program_guard(Program(), Program()):
-            x = fluid.layers.data(name='x', shape=[-1, 3])
+            x = paddle.static.data(name='x', shape=[-1, 3], dtype='float32')
+            x.desc.set_need_check_feed(False)
             z = paddle.roll(x, shifts=1)
             exe = fluid.Executor(fluid.CPUPlace())
             (res,) = exe.run(
@@ -81,7 +82,8 @@ class TestRollAPI(unittest.TestCase):
 
         # case 2:
         with program_guard(Program(), Program()):
-            x = fluid.layers.data(name='x', shape=[-1, 3])
+            x = paddle.static.data(name='x', shape=[-1, 3], dtype='float32')
+            x.desc.set_need_check_feed(False)
             z = paddle.roll(x, shifts=1, axis=0)
             exe = fluid.Executor(fluid.CPUPlace())
             (res,) = exe.run(
@@ -119,7 +121,8 @@ class TestRollAPI(unittest.TestCase):
 
         def test_axis_out_range():
             with program_guard(Program(), Program()):
-                x = fluid.layers.data(name='x', shape=[-1, 3])
+                x = paddle.static.data(name='x', shape=[-1, 3], dtype='float32')
+                x.desc.set_need_check_feed(False)
                 z = paddle.roll(x, shifts=1, axis=10)
                 exe = fluid.Executor(fluid.CPUPlace())
                 (res,) = exe.run(
diff --git a/python/paddle/fluid/tests/unittests/test_run_program_op.py b/python/paddle/fluid/tests/unittests/test_run_program_op.py
index 35686f843dec6f84ba4e698318404dfb40a99e94..fe012ded3993e2c861b2cb8d5281bda9ea9291e9 100644
--- a/python/paddle/fluid/tests/unittests/test_run_program_op.py
+++ b/python/paddle/fluid/tests/unittests/test_run_program_op.py
@@ -460,8 +460,8 @@ class TestRunProgramOpWithEmbedding(RunProgramOpTest):
 
     def build_model(self):
         # 1. simple model
-        x = fluid.layers.data(
-            name=self.input_names['X'][0], shape=[5], dtype='int64'
+        x = paddle.static.data(
+            name=self.input_names['X'][0], shape=[-1, 5], dtype='int64'
         )
         emb = fluid.input.embedding(
             input=x,
diff --git a/python/paddle/fluid/tests/unittests/test_save_model_without_var.py b/python/paddle/fluid/tests/unittests/test_save_model_without_var.py
index 1394006807029640ddbac9b003bf61ca7aa16313..97961b3df0af9bc1666098d64513256c50246b8b 100644
--- a/python/paddle/fluid/tests/unittests/test_save_model_without_var.py
+++ b/python/paddle/fluid/tests/unittests/test_save_model_without_var.py
@@ -15,14 +15,13 @@
 import unittest
 import warnings
 
+import paddle
 import paddle.fluid as fluid
 
 
 class TestSaveModelWithoutVar(unittest.TestCase):
     def test_no_var_save(self):
-        data = fluid.layers.data(
-            name='data', shape=[-1, 1], dtype='float32', append_batch_size=False
-        )
+        data = paddle.static.data(name='data', shape=[-1, 1], dtype='float32')
         data_plus = data + 1
 
         if fluid.core.is_compiled_with_cuda():
diff --git a/python/paddle/fluid/tests/unittests/test_scale_op.py b/python/paddle/fluid/tests/unittests/test_scale_op.py
index 3fc7aedf0221c2408445d0c3a89f082626b9a749..f66ff39f1087248784d2bd941e930bc5afc6bf1d 100644
--- a/python/paddle/fluid/tests/unittests/test_scale_op.py
+++ b/python/paddle/fluid/tests/unittests/test_scale_op.py
@@ -22,7 +22,6 @@ from op_test import OpTest, convert_float_to_uint16
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import paddle.fluid.layers as layers
 from paddle.fluid.op import Operator
 from paddle.static import Program, program_guard
 
@@ -247,7 +246,7 @@ class TestScaleDoubleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float32
 
-        data = layers.data('data', [2, 3], False, dtype)
+        data = paddle.static.data('data', [2, 3], dtype)
         data.persistable = True
         out = paddle.scale(data, 2.0)
         data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
@@ -278,7 +277,7 @@ class TestScaleTripleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float32
 
-        data = layers.data('data', [2, 3], False, dtype)
+        data = paddle.static.data('data', [2, 3], dtype)
         data.persistable = True
         out = paddle.scale(data, 2.0)
         data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
diff --git a/python/paddle/fluid/tests/unittests/test_scatter_nd_op.py b/python/paddle/fluid/tests/unittests/test_scatter_nd_op.py
index b0c58839b3df22cbf267c06f606d8115aae9c63a..5c5009984ee5b992e9b85f2cb322db11f7e9f9f2 100644
--- a/python/paddle/fluid/tests/unittests/test_scatter_nd_op.py
+++ b/python/paddle/fluid/tests/unittests/test_scatter_nd_op.py
@@ -167,44 +167,38 @@ class TestScatterNdOpAPI(unittest.TestCase):
     """
 
     def testcase1(self):
-        ref1 = fluid.layers.data(
+        ref1 = paddle.static.data(
             name='ref1',
             shape=[10, 9, 8, 1, 3],
             dtype='float32',
-            append_batch_size=False,
         )
-        index1 = fluid.layers.data(
+        index1 = paddle.static.data(
             name='index1',
             shape=[5, 5, 8, 5],
             dtype='int32',
-            append_batch_size=False,
         )
-        updates1 = fluid.layers.data(
+        updates1 = paddle.static.data(
             name='update1',
             shape=[5, 5, 8],
             dtype='float32',
-            append_batch_size=False,
         )
         output1 = paddle.scatter_nd_add(ref1, index1, updates1)
 
     def testcase2(self):
-        ref2 = fluid.layers.data(
+        ref2 = paddle.static.data(
             name='ref2',
             shape=[10, 9, 8, 1, 3],
             dtype='double',
-            append_batch_size=False,
         )
-        index2 = fluid.layers.data(
+        index2 = paddle.static.data(
             name='index2',
             shape=[5, 8, 5],
             dtype='int32',
-            append_batch_size=False,
         )
-        updates2 = fluid.layers.data(
+        updates2 = paddle.static.data(
             name='update2',
             shape=[5, 8],
             dtype='double',
-            append_batch_size=False,
         )
         output2 = paddle.scatter_nd_add(
             ref2, index2, updates2, name="scatter_nd_add"
@@ -212,33 +206,29 @@ class TestScatterNdOpAPI(unittest.TestCase):
 
     def testcase3(self):
         shape3 = [10, 9, 8, 1, 3]
-        index3 = fluid.layers.data(
+        index3 = paddle.static.data(
             name='index3',
             shape=[5, 5, 8, 5],
             dtype='int32',
-            append_batch_size=False,
         )
-        updates3 = fluid.layers.data(
+        updates3 = paddle.static.data(
             name='update3',
             shape=[5, 5, 8],
             dtype='float32',
-            append_batch_size=False,
         )
         output3 = paddle.scatter_nd(index3, updates3, shape3)
 
     def testcase4(self):
         shape4 = [10, 9, 8, 1, 3]
-        index4 = fluid.layers.data(
+        index4 = paddle.static.data(
             name='index4',
             shape=[5, 5, 8, 5],
             dtype='int32',
-            append_batch_size=False,
         )
-        updates4 = fluid.layers.data(
+        updates4 = paddle.static.data(
             name='update4',
             shape=[5, 5, 8],
             dtype='double',
-            append_batch_size=False,
         )
         output4 = paddle.scatter_nd(index4, updates4, shape4, name='scatter_nd')
 
@@ -298,14 +288,14 @@ class TestScatterNdOpRaise(unittest.TestCase):
     def test_check_raise(self):
         def check_raise_is_test():
             try:
-                ref5 = fluid.layers.data(
-                    name='ref5', shape=[3, 4, 5], dtype='float32'
+                ref5 = paddle.static.data(
+                    name='ref5', shape=[-1, 3, 4, 5], dtype='float32'
                 )
-                index5 = fluid.layers.data(
-                    name='index5', shape=[2, 10], dtype='int32'
+                index5 = paddle.static.data(
+                    name='index5', shape=[-1, 2, 10], dtype='int32'
                 )
-                updates5 = fluid.layers.data(
-                    name='updates5', shape=[2, 10], dtype='float32'
+                updates5 = paddle.static.data(
+                    name='updates5', shape=[-1, 2, 10], dtype='float32'
                 )
                 output5 = paddle.scatter_nd_add(ref5, index5, updates5)
             except Exception as e:
@@ -317,23 +307,20 @@ class TestScatterNdOpRaise(unittest.TestCase):
 
     def test_check_raise2(self):
         with self.assertRaises(ValueError):
-            ref6 = fluid.layers.data(
+            ref6 = paddle.static.data(
                 name='ref6',
                 shape=[10, 9, 8, 1, 3],
                 dtype='double',
-                append_batch_size=False,
             )
-            index6 = fluid.layers.data(
+            index6 = paddle.static.data(
                 name='index6',
                 shape=[5, 8, 5],
                 dtype='int32',
-                append_batch_size=False,
             )
-            updates6 = fluid.layers.data(
+            updates6 = paddle.static.data(
                 name='update6',
                 shape=[5, 8],
                 dtype='float32',
-                append_batch_size=False,
             )
             output6 = paddle.scatter_nd_add(ref6, index6, updates6)
 
@@ -341,11 +328,11 @@ class TestScatterNdOpRaise(unittest.TestCase):
         def check_raise_is_test():
             try:
                 shape = [3, 4, 5]
-                index7 = fluid.layers.data(
-                    name='index7', shape=[2, 1], dtype='int32'
+                index7 = paddle.static.data(
+                    name='index7', shape=[-1, 2, 1], dtype='int32'
                 )
-                updates7 = fluid.layers.data(
-                    name='updates7', shape=[2, 4, 5, 20], dtype='float32'
+                updates7 = paddle.static.data(
+                    name='updates7', shape=[-1, 2, 4, 5, 20], dtype='float32'
                 )
                 output7 = paddle.scatter_nd(index7, updates7, shape)
             except Exception as e:
diff --git a/python/paddle/fluid/tests/unittests/test_select_input_output_op.py b/python/paddle/fluid/tests/unittests/test_select_input_output_op.py
index c294e5f6e50bff9aeed8180683b19123d8dcc1d1..d92688d108bd1cfb24a38c0ccd7f994fb0a3a296 100644
--- a/python/paddle/fluid/tests/unittests/test_select_input_output_op.py
+++ b/python/paddle/fluid/tests/unittests/test_select_input_output_op.py
@@ -19,7 +19,6 @@ import numpy as np
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import paddle.fluid.layers as layers
 from paddle.fluid.backward import append_backward
 from paddle.fluid.executor import Executor
 from paddle.fluid.framework import Program, program_guard
@@ -33,9 +32,11 @@ class TestSplitMergeSelectedVarOps(unittest.TestCase):
         for branch_num in range(2, 10):
             program = Program()
             with program_guard(program):
-                x = layers.data(name='x', shape=[2], dtype='float32')
+                x = paddle.static.data(name='x', shape=[-1, 2], dtype='float32')
                 x.stop_gradient = False  # For test gradient
-                mask = layers.data(name='mask', shape=[1], dtype='int32')
+                mask = paddle.static.data(
+                    name='mask', shape=[-1, 1], dtype='int32'
+                )
 
                 outputs = []
                 for i in range(branch_num):
@@ -78,8 +79,8 @@ class TestSplitMergeSelectedVarOps(unittest.TestCase):
 class TestSelectInputOpError(unittest.TestCase):
     def test_errors(self):
         with program_guard(Program(), Program()):
-            mask = layers.data(name='mask', shape=[1], dtype='int32')
-            in1 = layers.data(name='in1', shape=[1], dtype='int32')
+            mask = paddle.static.data(name='mask', shape=[-1, 1], dtype='int32')
+            in1 = paddle.static.data(name='in1', shape=[-1, 1], dtype='int32')
 
             # 1. The type of inputs in select_input must be list or tuple.
             def test_inputs_type():
@@ -95,7 +96,9 @@ class TestSelectInputOpError(unittest.TestCase):
 
             # 3. The dtype of mask in select_input must be int32 or int64.
             def test_mask_dtype():
-                mask = layers.data(name='mask2', shape=[1], dtype='float32')
+                mask = paddle.static.data(
+                    name='mask2', shape=[-1, 1], dtype='float32'
+                )
                 select_input([in1], mask)
 
             self.assertRaises(TypeError, test_mask_dtype)
@@ -105,14 +108,14 @@ class TestSelectOutput_Error(unittest.TestCase):
     def test_errors(self):
         with program_guard(Program(), Program()):
 
-            in1 = layers.data(name='in1', shape=[1], dtype='int32')
-            mask_int32 = layers.data(
-                name='mask_int32', shape=[1], dtype='int32'
+            in1 = paddle.static.data(name='in1', shape=[-1, 1], dtype='int32')
+            mask_int32 = paddle.static.data(
+                name='mask_int32', shape=[-1, 1], dtype='int32'
             )
-            mask_float32 = layers.data(
-                name='mask_float32', shape=[1], dtype='float32'
+            mask_float32 = paddle.static.data(
+                name='mask_float32', shape=[-1, 1], dtype='float32'
             )
-            out1 = layers.data(name='out1', shape=[1], dtype='int32')
+            out1 = paddle.static.data(name='out1', shape=[-1, 1], dtype='int32')
 
             # 1. The type of input in select_output must Variable.
             def test_input_type():
diff --git a/python/paddle/fluid/tests/unittests/test_set_bool_attr.py b/python/paddle/fluid/tests/unittests/test_set_bool_attr.py
index da02e4621d0c23383b56e93899117e86eeaf7647..c599f08ae2bf99b7747a007ec0fdb6aadfc81b0a 100644
--- a/python/paddle/fluid/tests/unittests/test_set_bool_attr.py
+++ b/python/paddle/fluid/tests/unittests/test_set_bool_attr.py
@@ -21,7 +21,9 @@ import paddle.fluid as fluid
 
 class TestAttrSet(unittest.TestCase):
     def test_set_bool_attr(self):
-        x = fluid.layers.data(name='x', shape=[3, 7, 3, 7], dtype='float32')
+        x = paddle.static.data(
+            name='x', shape=[-1, 3, 7, 3, 7], dtype='float32'
+        )
         param_attr = fluid.ParamAttr(
             name='batch_norm_w',
             initializer=fluid.initializer.Constant(value=1.0),
diff --git a/python/paddle/fluid/tests/unittests/test_sgd_op_bf16.py b/python/paddle/fluid/tests/unittests/test_sgd_op_bf16.py
index 81b8c1b2b164374ffff6a62e438909ecac53c8f8..89515c931c2504c8fc2d915308f3dc9f3b069bdc 100644
--- a/python/paddle/fluid/tests/unittests/test_sgd_op_bf16.py
+++ b/python/paddle/fluid/tests/unittests/test_sgd_op_bf16.py
@@ -334,9 +334,13 @@ class TestSGDOpBF16API(unittest.TestCase):
         place = fluid.CPUPlace()
         main = fluid.Program()
         with fluid.program_guard(main):
-            x = fluid.layers.data(name='X', shape=self.ids_shape, dtype='int64')
-            label = fluid.layers.data(
-                name='Y', shape=self.y_shape, dtype='uint16'
+            ids_shape = list(self.ids_shape)
+            x = paddle.static.data(
+                name='X', shape=[-1] + ids_shape, dtype='int64'
+            )
+            y_shape = list(self.y_shape)
+            label = paddle.static.data(
+                name='Y', shape=[-1] + y_shape, dtype='uint16'
             )
             emb = fluid.layers.embedding(
                 input=x,
diff --git a/python/paddle/fluid/tests/unittests/test_sigmoid_cross_entropy_with_logits_op.py b/python/paddle/fluid/tests/unittests/test_sigmoid_cross_entropy_with_logits_op.py
index ce18a4647966d37eb7953742d48581d8d1afe2f6..de6e6bd725aea6c94b70f192be1a93dffb76a749 100644
--- a/python/paddle/fluid/tests/unittests/test_sigmoid_cross_entropy_with_logits_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sigmoid_cross_entropy_with_logits_op.py
@@ -286,11 +286,11 @@ class TestSigmoidCrossEntropyWithNorm2(OpTest):
                 def test_dtype():
                     # the input dtype of sigmoid_cross_entropy_with_logits must be float16 or float32 or float64
                     # float16 only can be set on GPU place
-                    x2 = fluid.layers.data(
-                        name='x2', shape=[3, 4, 5, 6], dtype="int32"
+                    x2 = paddle.static.data(
+                        name='x2', shape=[-1, 3, 4, 5, 6], dtype="int32"
                     )
-                    lab2 = fluid.layers.data(
-                        name='lab2', shape=[3, 4, 5, 6], dtype="int32"
+                    lab2 = paddle.static.data(
+                        name='lab2', shape=[-1, 3, 4, 5, 6], dtype="int32"
                     )
                     paddle.nn.functional.binary_cross_entropy_with_logits(
                         x2, lab2
diff --git a/python/paddle/fluid/tests/unittests/test_sign_op.py b/python/paddle/fluid/tests/unittests/test_sign_op.py
index ca6ca55c91f9713c56bc76bb4ca20a292e278bf6..7834736260d9dabd7b769f29f8416bcb6384b43e 100644
--- a/python/paddle/fluid/tests/unittests/test_sign_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sign_op.py
@@ -22,7 +22,6 @@ from op_test import OpTest
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import paddle.fluid.layers as layers
 from paddle.fluid import Program, program_guard
 
 
@@ -48,16 +47,16 @@ class TestSignOpError(unittest.TestCase):
             input1 = 12
             self.assertRaises(TypeError, paddle.sign, input1)
             # The input dtype of sign_op must be float16, float32, float64.
-            input2 = fluid.layers.data(
-                name='input2', shape=[12, 10], dtype="int32"
+            input2 = paddle.static.data(
+                name='input2', shape=[-1, 12, 10], dtype="int32"
             )
-            input3 = fluid.layers.data(
-                name='input3', shape=[12, 10], dtype="int64"
+            input3 = paddle.static.data(
+                name='input3', shape=[-1, 12, 10], dtype="int64"
             )
             self.assertRaises(TypeError, paddle.sign, input2)
             self.assertRaises(TypeError, paddle.sign, input3)
-            input4 = fluid.layers.data(
-                name='input4', shape=[4], dtype="float16"
+            input4 = paddle.static.data(
+                name='input4', shape=[-1, 4], dtype="float16"
             )
             paddle.sign(input4)
 
@@ -78,16 +77,16 @@ class TestSignAPI(unittest.TestCase):
             input1 = 12
             self.assertRaises(TypeError, paddle.tensor.math.sign, input1)
             # The input dtype of sign_op must be float16, float32, float64.
-            input2 = fluid.layers.data(
-                name='input2', shape=[12, 10], dtype="int32"
+            input2 = paddle.static.data(
+                name='input2', shape=[-1, 12, 10], dtype="int32"
             )
-            input3 = fluid.layers.data(
-                name='input3', shape=[12, 10], dtype="int64"
+            input3 = paddle.static.data(
+                name='input3', shape=[-1, 12, 10], dtype="int64"
             )
             self.assertRaises(TypeError, paddle.tensor.math.sign, input2)
             self.assertRaises(TypeError, paddle.tensor.math.sign, input3)
-            input4 = fluid.layers.data(
-                name='input4', shape=[4], dtype="float16"
+            input4 = paddle.static.data(
+                name='input4', shape=[-1, 4], dtype="float16"
             )
             paddle.sign(input4)
 
@@ -102,7 +101,7 @@ class TestSignDoubleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float32
 
-        data = layers.data('data', [1, 4], False, dtype)
+        data = paddle.static.data('data', [1, 4], dtype)
         data.persistable = True
         out = paddle.sign(data)
         data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
@@ -133,7 +132,7 @@ class TestSignTripleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float32
 
-        data = layers.data('data', [1, 4], False, dtype)
+        data = paddle.static.data('data', [1, 4], dtype)
         data.persistable = True
         out = paddle.sign(data)
         data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
diff --git a/python/paddle/fluid/tests/unittests/test_slice_op.py b/python/paddle/fluid/tests/unittests/test_slice_op.py
index 12838b218b43eadb6c2d8e45cfde4c7094a33a2d..19aa669badf5c48bfc0720c1a053fd6f5bd50bde 100644
--- a/python/paddle/fluid/tests/unittests/test_slice_op.py
+++ b/python/paddle/fluid/tests/unittests/test_slice_op.py
@@ -22,7 +22,6 @@ from op_test import OpTest, convert_float_to_uint16
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import paddle.fluid.layers as layers
 from paddle.tensor.manipulation import tensor_array_to_tensor
 
 paddle.enable_static()
@@ -551,17 +550,15 @@ class TestSliceAPI(unittest.TestCase):
         input = np.random.random([3, 4, 5, 6]).astype("float64")
         minus_1 = fluid.layers.fill_constant([1], "int32", -1)
         minus_3 = fluid.layers.fill_constant([1], "int64", -3)
-        starts = fluid.layers.data(
-            name='starts', shape=[1, 3], append_batch_size=False
+        starts = paddle.static.data(
+            name='starts', shape=[1, 3], dtype="float32"
         )
-        ends = fluid.layers.data(
-            name='ends', shape=[3], append_batch_size=False
-        )
-
-        x = fluid.layers.data(
+        starts.desc.set_need_check_feed(False)
+        ends = paddle.static.data(name='ends', shape=[3], dtype="float32")
+        ends.desc.set_need_check_feed(False)
+        x = paddle.static.data(
             name="x",
             shape=[3, 4, 5, 6],
-            append_batch_size=False,
             dtype="float64",
         )
 
@@ -885,7 +882,7 @@ class TestSliceDoubleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float32
 
-        data = layers.data('data', [4, 5, 6], False, dtype)
+        data = paddle.static.data('data', [4, 5, 6], dtype)
         data.persistable = True
         out = paddle.slice(
             data, axes=[0, 1, 2], starts=[-3, 0, 2], ends=[3, 2, 4]
@@ -920,7 +917,7 @@ class TestSliceTripleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float32
 
-        data = layers.data('data', [4, 5, 6], False, dtype)
+        data = paddle.static.data('data', [4, 5, 6], dtype)
         data.persistable = True
         out = paddle.slice(
             data, axes=[0, 1, 2], starts=[-3, 0, 2], ends=[3, 2, 4]
diff --git a/python/paddle/fluid/tests/unittests/test_split_op.py b/python/paddle/fluid/tests/unittests/test_split_op.py
index 943861ab2f57b3c06d3eaf18a5f530d08648109e..40e7bff55e0bcc85f97c4f658bf91433ed083e07 100644
--- a/python/paddle/fluid/tests/unittests/test_split_op.py
+++ b/python/paddle/fluid/tests/unittests/test_split_op.py
@@ -304,34 +304,46 @@ class TestSplitOpError(unittest.TestCase):
         with program_guard(Program(), Program()):
             # The type of axis in split_op should be int or Variable.
             def test_axis_type():
-                x6 = fluid.layers.data(shape=[4], dtype='float16', name='x3')
+                x6 = paddle.static.data(
+                    shape=[-1, 4], dtype='float16', name='x3'
+                )
                 paddle.split(x=x6, num_or_sections=2, axis=3.2)
 
             self.assertRaises(TypeError, test_axis_type)
 
             # The type of axis in split_op should be int or Variable.
             def test_axis_variable_type():
-                x9 = fluid.layers.data(shape=[4], dtype='float16', name='x9')
-                x10 = fluid.layers.data(shape=[1], dtype='float16', name='x10')
+                x9 = paddle.static.data(
+                    shape=[-1, 4], dtype='float16', name='x9'
+                )
+                x10 = paddle.static.data(
+                    shape=[-1, 1], dtype='float16', name='x10'
+                )
                 paddle.split(x=x9, num_or_sections=2, axis=x10)
 
             self.assertRaises(TypeError, test_axis_variable_type)
 
             # The type of num_or_sections in split_op should be int, tuple or list.
             def test_num_or_sections_type():
-                x6 = fluid.layers.data(shape=[4], dtype='float16', name='x4')
+                x6 = paddle.static.data(
+                    shape=[-1, 4], dtype='float16', name='x4'
+                )
                 paddle.split(x=x6, num_or_sections=2.1, axis=3)
 
             self.assertRaises(TypeError, test_num_or_sections_type)
 
             def test_num_or_sections_type_tensor():
-                x7 = fluid.layers.data(shape=[4], dtype='float16', name='x5')
+                x7 = paddle.static.data(
+                    shape=[-1, 4], dtype='float16', name='x5'
+                )
                 paddle.split(input=x7, num_or_sections=2.1, dim=3)
 
             self.assertRaises(TypeError, test_num_or_sections_type_tensor)
 
             def test_axis_type_tensor():
-                x8 = fluid.layers.data(shape=[4], dtype='float16', name='x6')
+                x8 = paddle.static.data(
+                    shape=[-1, 4], dtype='float16', name='x6'
+                )
                 paddle.split(input=x8, num_or_sections=2, dim=3.2)
 
             self.assertRaises(TypeError, test_axis_type_tensor)
@@ -340,8 +352,12 @@ class TestSplitOpError(unittest.TestCase):
 class API_TestSplit(unittest.TestCase):
     def test_out(self):
         with fluid.program_guard(fluid.Program(), fluid.Program()):
-            data1 = fluid.layers.data('data1', shape=[4, 6, 6], dtype='float64')
-            data2 = fluid.layers.data('data2', shape=[1], dtype='int32')
+            data1 = paddle.static.data(
+                'data1', shape=[-1, 4, 6, 6], dtype='float64'
+            )
+            data1.desc.set_need_check_feed(False)
+            data2 = paddle.static.data('data2', shape=[-1, 1], dtype='int32')
+            data2.desc.set_need_check_feed(False)
             x0, x1, x2 = paddle.split(data1, num_or_sections=3, axis=data2)
             place = fluid.CPUPlace()
             exe = fluid.Executor(place)
@@ -359,7 +375,10 @@ class API_TestSplit(unittest.TestCase):
 class API_TestSplit2(unittest.TestCase):
     def test_out(self):
         with fluid.program_guard(fluid.Program(), fluid.Program()):
-            data1 = fluid.layers.data('data1', shape=[4, 6, 6], dtype='float64')
+            data1 = paddle.static.data(
+                'data1', shape=[-1, 4, 6, 6], dtype='float64'
+            )
+            data1.desc.set_need_check_feed(False)
             x0, x1, x2 = paddle.split(data1, num_or_sections=3, axis=2)
             place = fluid.CPUPlace()
             exe = fluid.Executor(place)
@@ -378,7 +397,7 @@ class API_TestSplit2(unittest.TestCase):
 class API_TestSplit3(unittest.TestCase):
     def test_out(self):
         with fluid.program_guard(fluid.Program(), fluid.Program()):
-            data = fluid.layers.data('data', shape=[-1, 10], dtype='float64')
+            data = paddle.static.data('data', shape=[-1, 10], dtype='float64')
             x0, x1 = paddle.split(data, num_or_sections=(3, 7), axis=1)
             place = fluid.CPUPlace()
             exe = fluid.Executor(place)
@@ -392,8 +411,8 @@ class API_TestSplit3(unittest.TestCase):
 class API_TestSplit4(unittest.TestCase):
     def test_out(self):
         with fluid.program_guard(fluid.Program(), fluid.Program()):
-            data = fluid.layers.data('data', shape=[-1, 10], dtype='float64')
-            index = fluid.layers.data('index', shape=[1], dtype='int32')
+            data = paddle.static.data('data', shape=[-1, 10], dtype='float64')
+            index = paddle.static.data('index', shape=[1], dtype='int32')
             x0, x1 = paddle.split(data, num_or_sections=(3, index), axis=1)
             place = fluid.CPUPlace()
             exe = fluid.Executor(place)
@@ -430,7 +449,7 @@ class API_TestSplit5(unittest.TestCase):
 class API_TestSplit6(unittest.TestCase):
     def test_out(self):
         with fluid.program_guard(fluid.Program(), fluid.Program()):
-            data = fluid.layers.data('data', shape=[-1, 10], dtype='float64')
+            data = paddle.static.data('data', shape=[-1, 10], dtype='float64')
             x0, x1 = paddle.split(data, num_or_sections=[1, 1], axis=0)
             place = fluid.CPUPlace()
             exe = fluid.Executor(place)
diff --git a/python/paddle/fluid/tests/unittests/test_squeeze2_op.py b/python/paddle/fluid/tests/unittests/test_squeeze2_op.py
index b8374da08727aaa49bac1e5264a354c2321bfb15..166864bd5e3df31eb90dd010f109af10a6fbd73f 100755
--- a/python/paddle/fluid/tests/unittests/test_squeeze2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_squeeze2_op.py
@@ -155,5 +155,37 @@ class TestSqueeze2AxesTensorList(UnittestBase):
             self.assertEqual(infer_out.shape, (2, 3, 10))
 
 
+# test api
+class TestSqueezeAPI(unittest.TestCase):
+    def setUp(self):
+        self.executed_api()
+
+    def executed_api(self):
+        self.squeeze = paddle.squeeze
+
+    def test_api(self):
+        paddle.disable_static()
+        input_data = np.random.random([3, 2, 1]).astype("float32")
+        x = paddle.to_tensor(input_data)
+        out = self.squeeze(x, axis=2)
+        out.backward()
+
+        self.assertEqual(out.shape, [3, 2])
+
+        paddle.enable_static()
+
+    def test_error(self):
+        def test_axes_type():
+            x2 = paddle.static.data(name="x2", shape=[2, 1, 25], dtype="int32")
+            self.squeeze(x2, axis=2.1)
+
+        self.assertRaises(TypeError, test_axes_type)
+
+
+class TestSqueezeInplaceAPI(TestSqueezeAPI):
+    def executed_api(self):
+        self.squeeze = paddle.squeeze_
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_squeeze_op.py b/python/paddle/fluid/tests/unittests/test_squeeze_op.py
index b60152a51440a3e5bbc8628262a26298fb815a14..ae3b67a2f19d1390fe449b2a125763c24ef72587 100755
--- a/python/paddle/fluid/tests/unittests/test_squeeze_op.py
+++ b/python/paddle/fluid/tests/unittests/test_squeeze_op.py
@@ -22,7 +22,6 @@ from op_test import OpTest, convert_float_to_uint16
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import paddle.fluid.layers as layers
 from paddle.fluid import Program, program_guard
 
 paddle.enable_static()
@@ -228,7 +227,7 @@ class TestSqueezeDoubleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float32
 
-        data = layers.data('data', [2, 3], False, dtype)
+        data = paddle.static.data('data', [2, 3], dtype)
         data.persistable = True
         out = paddle.squeeze(data)
         data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
@@ -259,7 +258,7 @@ class TestSqueezeTripleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float32
 
-        data = layers.data('data', [2, 3], False, dtype)
+        data = paddle.static.data('data', [2, 3], dtype)
         data.persistable = True
         out = paddle.squeeze(data)
         data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
diff --git a/python/paddle/fluid/tests/unittests/test_stack_op.py b/python/paddle/fluid/tests/unittests/test_stack_op.py
index 9e84268a5fe4850a6fcb1ca56050c16f8e2e63d9..496877dc55c367a2be14b3eddfd5050e8dcd8e28 100644
--- a/python/paddle/fluid/tests/unittests/test_stack_op.py
+++ b/python/paddle/fluid/tests/unittests/test_stack_op.py
@@ -226,9 +226,9 @@ class TestTensorStackAPIWithLoDTensorArray(unittest.TestCase):
 class API_test(unittest.TestCase):
     def test_out(self):
         with fluid.program_guard(fluid.Program(), fluid.Program()):
-            data1 = fluid.layers.data('data1', shape=[1, 2], dtype='float64')
-            data2 = fluid.layers.data('data2', shape=[1, 2], dtype='float64')
-            data3 = fluid.layers.data('data3', shape=[1, 2], dtype='float64')
+            data1 = paddle.static.data('data1', shape=[1, 2], dtype='float64')
+            data2 = paddle.static.data('data2', shape=[1, 2], dtype='float64')
+            data3 = paddle.static.data('data3', shape=[1, 2], dtype='float64')
             result_stack = paddle.stack([data1, data2, data3], axis=0)
             place = fluid.CPUPlace()
             exe = fluid.Executor(place)
diff --git a/python/paddle/fluid/tests/unittests/test_static_save_load.py b/python/paddle/fluid/tests/unittests/test_static_save_load.py
index e3309e18a4870c4cda40c76e4b2cf2a51569b0b2..d043e3785c4984fd3a1fdfd70a40109dc8bb58ca 100644
--- a/python/paddle/fluid/tests/unittests/test_static_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_static_save_load.py
@@ -281,16 +281,20 @@ class TestSaveLoadBase(unittest.TestCase):
             place = self.set_place()
             exe = fluid.Executor(place)
             sgd = Adam(learning_rate=1e-3)
-            x = fluid.layers.data(
+            x = paddle.static.data(
                 name="x", shape=[-1, num_steps], dtype='int64'
             )
-            y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32')
-            init_hidden = fluid.layers.data(
-                name="init_hidden", shape=[1], dtype='float32'
+            x.desc.set_need_check_feed(False)
+            y = paddle.static.data(name="y", shape=[-1, 1], dtype='float32')
+            y.desc.set_need_check_feed(False)
+            init_hidden = paddle.static.data(
+                name="init_hidden", shape=[-1, 1], dtype='float32'
             )
-            init_cell = fluid.layers.data(
-                name="init_cell", shape=[1], dtype='float32'
+            init_hidden.desc.set_need_check_feed(False)
+            init_cell = paddle.static.data(
+                name="init_cell", shape=[-1, 1], dtype='float32'
             )
+            init_cell.desc.set_need_check_feed(False)
 
             static_loss, static_last_hidden, static_last_cell = ptb_model(
                 x, y, init_hidden, init_cell
@@ -406,16 +410,20 @@ class TestSaveLoadPartial(unittest.TestCase):
             place = self.set_place()
             exe = fluid.Executor(place)
             sgd = Adam(learning_rate=1e-3)
-            x = fluid.layers.data(
+            x = paddle.static.data(
                 name="x", shape=[-1, num_steps], dtype='int64'
             )
-            y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32')
-            init_hidden = fluid.layers.data(
-                name="init_hidden", shape=[1], dtype='float32'
+            x.desc.set_need_check_feed(False)
+            y = paddle.static.data(name="y", shape=[-1, 1], dtype='float32')
+            y.desc.set_need_check_feed(False)
+            init_hidden = paddle.static.data(
+                name="init_hidden", shape=[-1, 1], dtype='float32'
             )
-            init_cell = fluid.layers.data(
-                name="init_cell", shape=[1], dtype='float32'
+            init_hidden.desc.set_need_check_feed(False)
+            init_cell = paddle.static.data(
+                name="init_cell", shape=[-1, 1], dtype='float32'
             )
+            init_cell.desc.set_need_check_feed(False)
 
             static_loss, static_last_hidden, static_last_cell = ptb_model(
                 x, y, init_hidden, init_cell
@@ -544,16 +552,20 @@ class TestSaveLoadSetStateDict(unittest.TestCase):
             place = self.set_place()
             exe = fluid.Executor(place)
             sgd = Adam(learning_rate=1e-3)
-            x = fluid.layers.data(
+            x = paddle.static.data(
                 name="x", shape=[-1, num_steps], dtype='int64'
             )
-            y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32')
-            init_hidden = fluid.layers.data(
-                name="init_hidden", shape=[1], dtype='float32'
+            x.desc.set_need_check_feed(False)
+            y = paddle.static.data(name="y", shape=[-1, 1], dtype='float32')
+            y.desc.set_need_check_feed(False)
+            init_hidden = paddle.static.data(
+                name="init_hidden", shape=[-1, 1], dtype='float32'
             )
-            init_cell = fluid.layers.data(
-                name="init_cell", shape=[1], dtype='float32'
+            init_hidden.desc.set_need_check_feed(False)
+            init_cell = paddle.static.data(
+                name="init_cell", shape=[-1, 1], dtype='float32'
             )
+            init_cell.desc.set_need_check_feed(False)
 
             static_loss, static_last_hidden, static_last_cell = ptb_model(
                 x, y, init_hidden, init_cell
@@ -665,16 +677,20 @@ class TestProgramStatePartial(unittest.TestCase):
             place = self.set_place()
             exe = fluid.Executor(place)
             sgd = Adam(learning_rate=1e-3)
-            x = fluid.layers.data(
+            x = paddle.static.data(
                 name="x", shape=[-1, num_steps], dtype='int64'
             )
-            y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32')
-            init_hidden = fluid.layers.data(
-                name="init_hidden", shape=[1], dtype='float32'
+            x.desc.set_need_check_feed(False)
+            y = paddle.static.data(name="y", shape=[-1, 1], dtype='float32')
+            y.desc.set_need_check_feed(False)
+            init_hidden = paddle.static.data(
+                name="init_hidden", shape=[-1, 1], dtype='float32'
             )
-            init_cell = fluid.layers.data(
-                name="init_cell", shape=[1], dtype='float32'
+            init_hidden.desc.set_need_check_feed(False)
+            init_cell = paddle.static.data(
+                name="init_cell", shape=[-1, 1], dtype='float32'
             )
+            init_cell.desc.set_need_check_feed(False)
 
             static_loss, static_last_hidden, static_last_cell = ptb_model(
                 x, y, init_hidden, init_cell
@@ -992,16 +1008,20 @@ class TestLoadFromOldInterface(unittest.TestCase):
             place = self.set_place()
             exe = fluid.Executor(place)
             sgd = Adam(learning_rate=1e-3)
-            x = fluid.layers.data(
+            x = paddle.static.data(
                 name="x", shape=[-1, num_steps], dtype='int64'
             )
-            y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32')
-            init_hidden = fluid.layers.data(
-                name="init_hidden", shape=[1], dtype='float32'
+            x.desc.set_need_check_feed(False)
+            y = paddle.static.data(name="y", shape=[-1, 1], dtype='float32')
+            y.desc.set_need_check_feed(False)
+            init_hidden = paddle.static.data(
+                name="init_hidden", shape=[-1, 1], dtype='float32'
             )
-            init_cell = fluid.layers.data(
-                name="init_cell", shape=[1], dtype='float32'
+            init_hidden.desc.set_need_check_feed(False)
+            init_cell = paddle.static.data(
+                name="init_cell", shape=[-1, 1], dtype='float32'
             )
+            init_cell.desc.set_need_check_feed(False)
 
             static_loss, static_last_hidden, static_last_cell = ptb_model(
                 x, y, init_hidden, init_cell
@@ -1131,17 +1151,20 @@ class TestLoadFromOldInterface(unittest.TestCase):
             place = self.set_place()
             exe = fluid.Executor(place)
             sgd = Adam(learning_rate=1e-3)
-            x = fluid.layers.data(
+            x = paddle.static.data(
                 name="x", shape=[-1, num_steps], dtype='int64'
             )
-            y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32')
-            init_hidden = fluid.layers.data(
-                name="init_hidden", shape=[1], dtype='float32'
+            x.desc.set_need_check_feed(False)
+            y = paddle.static.data(name="y", shape=[-1, 1], dtype='float32')
+            y.desc.set_need_check_feed(False)
+            init_hidden = paddle.static.data(
+                name="init_hidden", shape=[-1, 1], dtype='float32'
             )
-            init_cell = fluid.layers.data(
-                name="init_cell", shape=[1], dtype='float32'
+            init_hidden.desc.set_need_check_feed(False)
+            init_cell = paddle.static.data(
+                name="init_cell", shape=[-1, 1], dtype='float32'
             )
-
+            init_cell.desc.set_need_check_feed(False)
             static_loss, static_last_hidden, static_last_cell = ptb_model(
                 x, y, init_hidden, init_cell
             )
@@ -1271,16 +1294,20 @@ class TestLoadFromOldInterfaceSingleFile(unittest.TestCase):
             place = self.set_place()
             exe = fluid.Executor(place)
             sgd = Adam(learning_rate=1e-3)
-            x = fluid.layers.data(
+            x = paddle.static.data(
                 name="x", shape=[-1, num_steps], dtype='int64'
             )
-            y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32')
-            init_hidden = fluid.layers.data(
-                name="init_hidden", shape=[1], dtype='float32'
+            x.desc.set_need_check_feed(False)
+            y = paddle.static.data(name="y", shape=[-1, 1], dtype='float32')
+            y.desc.set_need_check_feed(False)
+            init_hidden = paddle.static.data(
+                name="init_hidden", shape=[-1, 1], dtype='float32'
             )
-            init_cell = fluid.layers.data(
-                name="init_cell", shape=[1], dtype='float32'
+            init_hidden.desc.set_need_check_feed(False)
+            init_cell = paddle.static.data(
+                name="init_cell", shape=[-1, 1], dtype='float32'
             )
+            init_cell.desc.set_need_check_feed(False)
 
             static_loss, static_last_hidden, static_last_cell = ptb_model(
                 x, y, init_hidden, init_cell
@@ -1462,16 +1489,20 @@ class TestProgramStateOldSave(unittest.TestCase):
             place = self.set_place()
             exe = fluid.Executor(place)
             sgd = Adam(learning_rate=1e-3)
-            x = fluid.layers.data(
+            x = paddle.static.data(
                 name="x", shape=[-1, num_steps], dtype='int64'
             )
-            y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32')
-            init_hidden = fluid.layers.data(
-                name="init_hidden", shape=[1], dtype='float32'
+            x.desc.set_need_check_feed(False)
+            y = paddle.static.data(name="y", shape=[-1, 1], dtype='float32')
+            y.desc.set_need_check_feed(False)
+            init_hidden = paddle.static.data(
+                name="init_hidden", shape=[-1, 1], dtype='float32'
             )
-            init_cell = fluid.layers.data(
-                name="init_cell", shape=[1], dtype='float32'
+            init_hidden.desc.set_need_check_feed(False)
+            init_cell = paddle.static.data(
+                name="init_cell", shape=[-1, 1], dtype='float32'
             )
+            init_cell.desc.set_need_check_feed(False)
 
             static_loss, static_last_hidden, static_last_cell = ptb_model(
                 x, y, init_hidden, init_cell
@@ -1634,16 +1665,20 @@ class TestProgramStateOldSaveSingleModel(unittest.TestCase):
             place = self.set_place()
             exe = fluid.Executor(place)
             sgd = Adam(learning_rate=1e-3)
-            x = fluid.layers.data(
+            x = paddle.static.data(
                 name="x", shape=[-1, num_steps], dtype='int64'
             )
-            y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32')
-            init_hidden = fluid.layers.data(
-                name="init_hidden", shape=[1], dtype='float32'
+            x.desc.set_need_check_feed(False)
+            y = paddle.static.data(name="y", shape=[-1, 1], dtype='float32')
+            y.desc.set_need_check_feed(False)
+            init_hidden = paddle.static.data(
+                name="init_hidden", shape=[-1, 1], dtype='float32'
             )
-            init_cell = fluid.layers.data(
-                name="init_cell", shape=[1], dtype='float32'
+            init_hidden.desc.set_need_check_feed(False)
+            init_cell = paddle.static.data(
+                name="init_cell", shape=[-1, 1], dtype='float32'
             )
+            init_cell.desc.set_need_check_feed(False)
 
             static_loss, static_last_hidden, static_last_cell = ptb_model(
                 x, y, init_hidden, init_cell
@@ -1769,6 +1804,7 @@ class TestStaticSaveLoadPickle(unittest.TestCase):
                 shape=[None, 10],
                 dtype='float32',
             )
+            x.desc.set_need_check_feed(False)
             z = paddle.static.nn.fc(x, 10, bias_attr=False)
             place = paddle.CPUPlace()
             exe = paddle.static.Executor(place)
@@ -1838,6 +1874,7 @@ class TestSaveLoadInferenceModel(unittest.TestCase):
         main_program = framework.Program()
         with framework.program_guard(main_program):
             x = paddle.static.data(name="x", shape=[10, 10], dtype='float32')
+            x.desc.set_need_check_feed(False)
             y = x + x
 
             place = paddle.CPUPlace()
diff --git a/python/paddle/fluid/tests/unittests/test_static_save_load_bf16.py b/python/paddle/fluid/tests/unittests/test_static_save_load_bf16.py
index 5a13133bf79f2a480e86762b1e2dfa11a1037690..024f31fdf92cc80c434786d8826108010834212c 100644
--- a/python/paddle/fluid/tests/unittests/test_static_save_load_bf16.py
+++ b/python/paddle/fluid/tests/unittests/test_static_save_load_bf16.py
@@ -65,16 +65,20 @@ class TestSaveLoadBF16(unittest.TestCase):
             place = self.set_place()
             exe = fluid.Executor(place)
             sgd = SGDOptimizer(learning_rate=1e-3)
-            x = fluid.layers.data(
+            x = paddle.static.data(
                 name="x", shape=[-1, num_steps], dtype='int64'
             )
-            y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32')
-            init_hidden = fluid.layers.data(
-                name="init_hidden", shape=[1], dtype='float32'
+            x.desc.set_need_check_feed(False)
+            y = paddle.static.data(name="y", shape=[-1, 1], dtype='float32')
+            y.desc.set_need_check_feed(False)
+            init_hidden = paddle.static.data(
+                name="init_hidden", shape=[-1, 1], dtype='float32'
             )
-            init_cell = fluid.layers.data(
-                name="init_cell", shape=[1], dtype='float32'
+            init_hidden.desc.set_need_check_feed(False)
+            init_cell = paddle.static.data(
+                name="init_cell", shape=[-1, 1], dtype='float32'
             )
+            init_cell.desc.set_need_check_feed(False)
 
             static_loss, static_last_hidden, static_last_cell = ptb_model(
                 x, y, init_hidden, init_cell
diff --git a/python/paddle/fluid/tests/unittests/test_static_shape_inferrence_for_shape_tensor.py b/python/paddle/fluid/tests/unittests/test_static_shape_inferrence_for_shape_tensor.py
index 5ba3bcbbc110866da51f61f5b2a6ccd3398a936d..9e2391233ce60163e664207b16441d3943978c0c 100644
--- a/python/paddle/fluid/tests/unittests/test_static_shape_inferrence_for_shape_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_static_shape_inferrence_for_shape_tensor.py
@@ -21,9 +21,7 @@ from paddle.fluid.layers.utils import try_set_static_shape_tensor
 class StaticShapeInferrenceTest(unittest.TestCase):
     def test_static_graph(self):
         paddle.enable_static()
-        data = paddle.fluid.layers.data(
-            name="x", shape=[-1, 2], dtype='float32'
-        )
+        data = paddle.static.data(name="x", shape=[-1, 2], dtype='float32')
         shape = paddle.shape(data)  # shape should be [-1, 2]
         x = paddle.uniform(shape)
         try_set_static_shape_tensor(x, shape)
diff --git a/python/paddle/fluid/tests/unittests/test_strided_slice_op.py b/python/paddle/fluid/tests/unittests/test_strided_slice_op.py
index 996b0c4a33338f8879c81f67af05cd38ec6d64e6..05a2631d775d0f4767d3be2d857b6a25c627dc1a 100644
--- a/python/paddle/fluid/tests/unittests/test_strided_slice_op.py
+++ b/python/paddle/fluid/tests/unittests/test_strided_slice_op.py
@@ -550,20 +550,13 @@ class TestStridedSliceAPI(unittest.TestCase):
         input = np.random.random([3, 4, 5, 6]).astype("float64")
         minus_1 = fluid.layers.fill_constant([1], "int32", -1)
         minus_3 = fluid.layers.fill_constant([1], "int32", -3)
-        starts = fluid.layers.data(
-            name='starts', shape=[3], dtype='int32', append_batch_size=False
-        )
-        ends = fluid.layers.data(
-            name='ends', shape=[3], dtype='int32', append_batch_size=False
-        )
-        strides = fluid.layers.data(
-            name='strides', shape=[3], dtype='int32', append_batch_size=False
-        )
+        starts = paddle.static.data(name='starts', shape=[3], dtype='int32')
+        ends = paddle.static.data(name='ends', shape=[3], dtype='int32')
+        strides = paddle.static.data(name='strides', shape=[3], dtype='int32')
 
-        x = fluid.layers.data(
+        x = paddle.static.data(
             name="x",
             shape=[3, 4, 5, 6],
-            append_batch_size=False,
             dtype="float64",
         )
         out_1 = paddle.strided_slice(
diff --git a/python/paddle/fluid/tests/unittests/test_sum_op.py b/python/paddle/fluid/tests/unittests/test_sum_op.py
index 22b9dc573fb93aa97df5c1aed8ea6a13b9d7b2c6..6e9ff86cb8b7f83ad52c747375dd285ae989b2bc 100644
--- a/python/paddle/fluid/tests/unittests/test_sum_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sum_op.py
@@ -23,7 +23,6 @@ from decorator_helper import prog_scope
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import paddle.fluid.layers as layers
 import paddle.inference as paddle_infer
 from paddle import enable_static
 from paddle.fluid.op import Operator
@@ -603,9 +602,9 @@ class TestAddNDoubleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float32
 
-        data1 = layers.data('data1', [3, 4, 5], False, dtype)
+        data1 = paddle.static.data('data1', [3, 4, 5], dtype)
         data1.persistable = True
-        data2 = layers.data('data2', [3, 4, 5], False, dtype)
+        data2 = paddle.static.data('data2', [3, 4, 5], dtype)
         data2.persistable = True
         out = paddle.add_n([data1, data2])
         data1_arr = np.random.uniform(-1, 1, data1.shape).astype(dtype)
@@ -645,9 +644,9 @@ class TestAddNTripleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float32
 
-        data1 = layers.data('data1', [3, 4, 5], False, dtype)
+        data1 = paddle.static.data('data1', [3, 4, 5], dtype)
         data1.persistable = True
-        data2 = layers.data('data2', [3, 4, 5], False, dtype)
+        data2 = paddle.static.data('data2', [3, 4, 5], dtype)
         data2.persistable = True
         out = paddle.add_n([data1, data2])
         data1_arr = np.random.uniform(-1, 1, data1.shape).astype(dtype)
@@ -687,7 +686,7 @@ class TestSumDoubleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float32
 
-        data = layers.data('data', [2, 4], False, dtype)
+        data = paddle.static.data('data', [2, 4], dtype)
         data.persistable = True
         out = paddle.sum(data, axis=1, keepdim=True)
         data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
@@ -718,7 +717,7 @@ class TestSumTripleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float32
 
-        data = layers.data('data', [2, 4], False, dtype)
+        data = paddle.static.data('data', [2, 4], dtype)
         data.persistable = True
         out = paddle.sum(data, axis=1, keepdim=True)
         data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
diff --git a/python/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py b/python/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py
index 1b8f912bb508b1658f2c01a68c1e9d3a7a72afa8..14cebe7e6461f56e23719aca0ca0e2964a0480c1 100644
--- a/python/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py
@@ -67,12 +67,12 @@ class TestSyncBatchNormOpTraining(unittest.TestCase):
         use_cudnn = self.dtype == np.float16
         with fluid.unique_name.guard():
             with fluid.program_guard(main, startup):
-                data = fluid.layers.data(
+                data = paddle.static.data(
                     name='input',
                     shape=self.dshape,
                     dtype=self.dtype,
-                    append_batch_size=False,
                 )
+                data.desc.set_need_check_feed(False)
                 conv = paddle.static.nn.conv2d(
                     input=data,
                     num_filters=32,
@@ -248,7 +248,10 @@ class TestDygraphSyncBatchNormAPIError(unittest.TestCase):
 
             # the input dtype of SyncBatchNorm must be float16 or float32 or float64
             # float16 only can be set on GPU place
-            x2 = fluid.layers.data(name='x2', shape=[3, 4, 5, 6], dtype="int32")
+            x2 = paddle.static.data(
+                name='x2', shape=[-1, 3, 4, 5, 6], dtype="int32"
+            )
+            x2.desc.set_need_check_feed(False)
             self.assertRaises(TypeError, my_sync_batch_norm, x2)
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_tdm_child_op.py b/python/paddle/fluid/tests/unittests/test_tdm_child_op.py
index 5d261dd1efb0b2e335fe1f559395a6ffdfd569fe..2481a48f01793cdeb8ef30b528589447fe76f6fd 100644
--- a/python/paddle/fluid/tests/unittests/test_tdm_child_op.py
+++ b/python/paddle/fluid/tests/unittests/test_tdm_child_op.py
@@ -17,6 +17,7 @@ import unittest
 import numpy as np
 from op_test import OpTest
 
+import paddle
 import paddle.fluid as fluid
 
 
@@ -139,7 +140,9 @@ class TestCase4(TestTDMChildOp):
 
 class TestTDMChildShape(unittest.TestCase):
     def test_shape(self):
-        x = fluid.layers.data(name='x', shape=[1], dtype='int32', lod_level=1)
+        x = paddle.static.data(
+            name='x', shape=[-1, 1], dtype='int32', lod_level=1
+        )
         tdm_tree_info = create_tdm_tree()
         tree_info_np = np.array(tdm_tree_info).astype('int32')
 
diff --git a/python/paddle/fluid/tests/unittests/test_tdm_sampler_op.py b/python/paddle/fluid/tests/unittests/test_tdm_sampler_op.py
index 7a649a8c238dbea509a36f40044af3ebcab6792d..217d84b4b9f8afc2d9a52adec74f50ab3e07e6d5 100644
--- a/python/paddle/fluid/tests/unittests/test_tdm_sampler_op.py
+++ b/python/paddle/fluid/tests/unittests/test_tdm_sampler_op.py
@@ -18,6 +18,7 @@ import unittest
 import numpy as np
 from op_test import OpTest
 
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 
@@ -266,7 +267,9 @@ class TestCase7(TestTDMSamplerOp):
 
 class TestTDMSamplerShape(unittest.TestCase):
     def test_shape(self):
-        x = fluid.layers.data(name='x', shape=[1], dtype='int32', lod_level=1)
+        x = paddle.static.data(
+            name='x', shape=[-1, 1], dtype='int32', lod_level=1
+        )
         tdm_tree_travel = create_tdm_travel()
         tdm_tree_layer = create_tdm_layer()
         layer_node_num_list = [len(i) for i in tdm_tree_layer]
diff --git a/python/paddle/fluid/tests/unittests/test_tile_op.py b/python/paddle/fluid/tests/unittests/test_tile_op.py
index 419c142b6df5ff54909d62e649471741b48666bd..52562a6eb6f99596935069f83f793802bae6b4b8 100644
--- a/python/paddle/fluid/tests/unittests/test_tile_op.py
+++ b/python/paddle/fluid/tests/unittests/test_tile_op.py
@@ -17,11 +17,10 @@ import unittest
 import gradient_checker
 import numpy as np
 from decorator_helper import prog_scope
-from op_test import OpTest
+from eager_op_test import OpTest
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 from paddle.fluid import Program, core, program_guard
 
 
@@ -29,6 +28,7 @@ from paddle.fluid import Program, core, program_guard
 class TestTileOpRank1(OpTest):
     def setUp(self):
         self.op_type = "tile"
+        self.python_api = paddle.tile
         self.init_data()
 
         self.inputs = {'X': np.random.random(self.ori_shape).astype("float64")}
@@ -106,6 +106,7 @@ class TestTileOpRank4(TestTileOpRank1):
 class TestTileOpRank1_tensor_attr(OpTest):
     def setUp(self):
         self.op_type = "tile"
+        self.python_api = paddle.tile
         self.init_data()
         repeat_times_tensor = []
         for index, ele in enumerate(self.repeat_times):
@@ -151,6 +152,7 @@ class TestTileOpRank2_attr_tensor(TestTileOpRank1_tensor_attr):
 class TestTileOpRank1_tensor(OpTest):
     def setUp(self):
         self.op_type = "tile"
+        self.python_api = paddle.tile
         self.init_data()
 
         self.inputs = {
@@ -182,6 +184,7 @@ class TestTileOpRank2_tensor(TestTileOpRank1_tensor):
 class TestTileOpInteger(OpTest):
     def setUp(self):
         self.op_type = "tile"
+        self.python_api = paddle.tile
         self.inputs = {
             'X': np.random.randint(10, size=(4, 4, 5)).astype("int32")
         }
@@ -197,6 +200,7 @@ class TestTileOpInteger(OpTest):
 class TestTileOpBoolean(OpTest):
     def setUp(self):
         self.op_type = "tile"
+        self.python_api = paddle.tile
         self.inputs = {'X': np.random.randint(2, size=(2, 4, 5)).astype("bool")}
         self.attrs = {'repeat_times': [2, 1, 4]}
         output = np.tile(self.inputs['X'], (2, 1, 4))
@@ -210,6 +214,7 @@ class TestTileOpBoolean(OpTest):
 class TestTileOpInt64_t(OpTest):
     def setUp(self):
         self.op_type = "tile"
+        self.python_api = paddle.tile
         self.inputs = {
             'X': np.random.randint(10, size=(2, 4, 5)).astype("int64")
         }
@@ -229,9 +234,9 @@ class TestTileError(unittest.TestCase):
             )
             repeat_times = [2, 2]
             self.assertRaises(TypeError, paddle.tile, x1, repeat_times)
-            x2 = fluid.layers.data(name='x2', shape=[4], dtype="uint8")
+            x2 = paddle.static.data(name='x2', shape=[-1, 4], dtype="uint8")
             self.assertRaises(TypeError, paddle.tile, x2, repeat_times)
-            x3 = fluid.layers.data(name='x3', shape=[4], dtype="bool")
+            x3 = paddle.static.data(name='x3', shape=[-1, 4], dtype="bool")
             x3.stop_gradient = False
             self.assertRaises(ValueError, paddle.tile, x3, repeat_times)
 
@@ -240,7 +245,7 @@ class TestTileAPIStatic(unittest.TestCase):
     def test_api(self):
         with program_guard(Program(), Program()):
             repeat_times = [2, 2]
-            x1 = fluid.layers.data(name='x1', shape=[4], dtype="int32")
+            x1 = paddle.static.data(name='x1', shape=[-1, 4], dtype="int32")
             out = paddle.tile(x1, repeat_times)
             positive_2 = fluid.layers.fill_constant([1], dtype="int32", value=2)
             out2 = paddle.tile(x1, repeat_times=[positive_2, 2])
@@ -278,7 +283,7 @@ class TestTileDoubleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float32
 
-        data = layers.data('data', [1, 2], False, dtype)
+        data = paddle.static.data('data', [1, 2], dtype)
         data.persistable = True
         out = paddle.tile(data, [2, 1])
         data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
@@ -309,7 +314,7 @@ class TestTileTripleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float32
 
-        data = layers.data('data', [1, 2], False, dtype)
+        data = paddle.static.data('data', [1, 2], dtype)
         data.persistable = True
         out = paddle.tile(data, [2, 1])
         data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
diff --git a/python/paddle/fluid/tests/unittests/test_trainable.py b/python/paddle/fluid/tests/unittests/test_trainable.py
index a9f96230ffb0fa050be7026671335226336e935d..86664d4f3b69508a35c3cb20abf59cd138c70f1d 100644
--- a/python/paddle/fluid/tests/unittests/test_trainable.py
+++ b/python/paddle/fluid/tests/unittests/test_trainable.py
@@ -22,8 +22,8 @@ import paddle.fluid as fluid
 
 
 def test_trainable():
-    x = fluid.layers.data(name='image', shape=[784], dtype='float32')
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    x = paddle.static.data(name='image', shape=[-1, 784], dtype='float32')
+    label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
     feature = paddle.static.nn.fc(
         x, size=10, weight_attr=fluid.ParamAttr(trainable=False)
     )
diff --git a/python/paddle/fluid/tests/unittests/test_transpose_op.py b/python/paddle/fluid/tests/unittests/test_transpose_op.py
index ad57289ffd3ebbbbceb13fea7f3099e1e26a57c9..a2f922dcd8db427b6c70666ca1b724fb9aa858a5 100644
--- a/python/paddle/fluid/tests/unittests/test_transpose_op.py
+++ b/python/paddle/fluid/tests/unittests/test_transpose_op.py
@@ -21,7 +21,6 @@ from decorator_helper import prog_scope
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import paddle.fluid.layers as layers
 from paddle.fluid import Program, program_guard
 from paddle.fluid.tests.unittests.op_test import OpTest, convert_float_to_uint16
 
@@ -289,7 +288,9 @@ class TestTransposeOpError(unittest.TestCase):
     def test_errors(self):
         paddle.enable_static()
         with program_guard(Program(), Program()):
-            x = fluid.layers.data(name='x', shape=[10, 5, 3], dtype='float64')
+            x = paddle.static.data(
+                name='x', shape=[-1, 10, 5, 3], dtype='float64'
+            )
 
             def test_x_Variable_check():
                 # the Input(x)'s type must be Variable
@@ -299,8 +300,8 @@ class TestTransposeOpError(unittest.TestCase):
 
             def test_x_dtype_check():
                 # the Input(x)'s dtype must be one of [bool, float16, float32, float64, int32, int64]
-                x1 = fluid.layers.data(
-                    name='x1', shape=[10, 5, 3], dtype='int8'
+                x1 = paddle.static.data(
+                    name='x1', shape=[-1, 10, 5, 3], dtype='int8'
                 )
                 paddle.transpose(x1, perm=[1, 0, 2])
 
@@ -520,7 +521,7 @@ class TestTransposeDoubleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float32
 
-        data = layers.data('data', [2, 3, 4], False, dtype)
+        data = paddle.static.data('data', [2, 3, 4], dtype)
         data.persistable = True
         out = paddle.transpose(data, [1, 0, 2])
         data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
@@ -551,7 +552,7 @@ class TestTransposeTripleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float32
 
-        data = layers.data('data', [2, 3, 4], False, dtype)
+        data = paddle.static.data('data', [2, 3, 4], dtype)
         data.persistable = True
         out = paddle.transpose(data, [1, 0, 2])
         data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
diff --git a/python/paddle/fluid/tests/unittests/test_tril_indices_op.py b/python/paddle/fluid/tests/unittests/test_tril_indices_op.py
index 6c473f2a8a817fec3a8555a946f26764d0055a03..a3932988cd25cd00c5df50f6da7378506ba80ad7 100644
--- a/python/paddle/fluid/tests/unittests/test_tril_indices_op.py
+++ b/python/paddle/fluid/tests/unittests/test_tril_indices_op.py
@@ -15,7 +15,7 @@
 import unittest
 
 import numpy as np
-from op_test import OpTest
+from eager_op_test import OpTest
 
 import paddle
 import paddle.fluid as fluid
@@ -24,6 +24,7 @@ import paddle.fluid as fluid
 class TestTrilIndicesOp(OpTest):
     def setUp(self):
         self.op_type = "tril_indices"
+        self.python_api = paddle.tril_indices
         self.inputs = {}
         self.init_config()
         self.outputs = {'out': self.target}
diff --git a/python/paddle/fluid/tests/unittests/test_triu_indices_op.py b/python/paddle/fluid/tests/unittests/test_triu_indices_op.py
index a9e391d6adf2612f72dc2c492d8fb2ab8bb6c332..b610fbaafdb08bf82aa0acb92b1a47f69170e066 100644
--- a/python/paddle/fluid/tests/unittests/test_triu_indices_op.py
+++ b/python/paddle/fluid/tests/unittests/test_triu_indices_op.py
@@ -15,7 +15,7 @@
 import unittest
 
 import numpy as np
-from op_test import OpTest
+from eager_op_test import OpTest
 
 import paddle
 import paddle.fluid as fluid
@@ -24,6 +24,7 @@ import paddle.fluid as fluid
 class TestTriuIndicesOp(OpTest):
     def setUp(self):
         self.op_type = "triu_indices"
+        self.python_api = paddle.triu_indices
         self.inputs = {}
         self.init_config()
         self.outputs = {'out': self.target}
diff --git a/python/paddle/fluid/tests/unittests/test_unfold_op.py b/python/paddle/fluid/tests/unittests/test_unfold_op.py
index 3deb20ed91f5f3b3eb0641ddf40d4dc90d48eefd..04ee20d90abbda2d66d89a7bd93cd2b2d31c62ac 100644
--- a/python/paddle/fluid/tests/unittests/test_unfold_op.py
+++ b/python/paddle/fluid/tests/unittests/test_unfold_op.py
@@ -15,7 +15,7 @@
 import unittest
 
 import numpy as np
-from op_test import OpTest
+from eager_op_test import OpTest
 
 import paddle
 import paddle.fluid as fluid
@@ -123,6 +123,7 @@ class TestUnfoldOp(OpTest):
 
     def setUp(self):
         self.op_type = 'unfold'
+        self.python_api = paddle.nn.functional.unfold
         self.set_data()
 
     def test_check_output(self):
@@ -139,6 +140,7 @@ class TestUnfoldAPI(TestUnfoldOp):
 
     def setUp(self):
         self.op_type = 'unfold'
+        self.python_api = paddle.nn.functional.unfold
         self.set_data()
         self.places = [fluid.CPUPlace()]
         if core.is_compiled_with_cuda():
diff --git a/python/paddle/fluid/tests/unittests/test_uniform_random_op.py b/python/paddle/fluid/tests/unittests/test_uniform_random_op.py
index e86dec9899af2ebe66f7bdc9a0a2c4e34aa42031..c31d763dbff7c50d085d8d73bc93ab7ec8e4e089 100644
--- a/python/paddle/fluid/tests/unittests/test_uniform_random_op.py
+++ b/python/paddle/fluid/tests/unittests/test_uniform_random_op.py
@@ -280,7 +280,9 @@ class TestUniformRandomOpSelectedRowsWithDiagInit(
 class TestUniformRandomOpApi(unittest.TestCase):
     def test_api(self):
         paddle.seed(10)
-        x = fluid.layers.data('x', shape=[16], dtype='float32', lod_level=1)
+        x = paddle.static.data(
+            'x', shape=[-1, 16], dtype='float32', lod_level=1
+        )
         y = paddle.static.nn.fc(
             x,
             size=16,
@@ -467,16 +469,16 @@ class TestUniformRandomBatchSizeLikeOpError(unittest.TestCase):
             self.assertRaises(TypeError, test_Variable)
 
             def test_shape():
-                x1 = fluid.layers.data(
-                    name='x2', shape=[100, 784], dtype='float32'
+                x1 = paddle.static.data(
+                    name='x2', shape=[-1, 100, 784], dtype='float32'
                 )
                 random.uniform_random_batch_size_like(x1, shape="shape")
 
             self.assertRaises(TypeError, test_shape)
 
             def test_dtype():
-                x2 = fluid.layers.data(
-                    name='x2', shape=[100, 784], dtype='float32'
+                x2 = paddle.static.data(
+                    name='x2', shape=[-1, 100, 784], dtype='float32'
                 )
                 random.uniform_random_batch_size_like(x2, 'int32')
 
@@ -516,8 +518,8 @@ class TestUniformOpError(unittest.TestCase):
             self.assertRaises(TypeError, test_Variable2)
 
             def test_dtype():
-                x2 = fluid.layers.data(
-                    name='x2', shape=[100, 784], dtype='float32'
+                x2 = paddle.static.data(
+                    name='x2', shape=[-1, 100, 784], dtype='float32'
                 )
                 paddle.tensor.random.uniform(x2, 'int32')
 
diff --git a/python/paddle/fluid/tests/unittests/test_unsqueeze_op.py b/python/paddle/fluid/tests/unittests/test_unsqueeze_op.py
index 44fd0888f6b63298be9f7d11e6beab7f79c47e78..fdb68a27795c2064c40793c0925d00c2315aa3bb 100755
--- a/python/paddle/fluid/tests/unittests/test_unsqueeze_op.py
+++ b/python/paddle/fluid/tests/unittests/test_unsqueeze_op.py
@@ -22,7 +22,6 @@ from op_test import OpTest, convert_float_to_uint16
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
-import paddle.fluid.layers as layers
 
 paddle.enable_static()
 
@@ -329,7 +328,7 @@ class TestUnsqueezeDoubleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float32
 
-        data = layers.data('data', [2, 3, 4], False, dtype)
+        data = paddle.static.data('data', [2, 3, 4], dtype)
         data.persistable = True
         out = paddle.unsqueeze(data, [0, 2])
         data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
@@ -360,7 +359,7 @@ class TestUnsqueezeTripleGradCheck(unittest.TestCase):
         eps = 0.005
         dtype = np.float32
 
-        data = layers.data('data', [2, 3, 4], False, dtype)
+        data = paddle.static.data('data', [2, 3, 4], dtype)
         data.persistable = True
         out = paddle.unsqueeze(data, [0, 2])
         data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
diff --git a/python/paddle/fluid/tests/unittests/test_variable.py b/python/paddle/fluid/tests/unittests/test_variable.py
index 8520cf9067c05cce8a1f943f34d418e9b2ff6aae..94c098967e44597856f077dacd47561498eb2e13 100644
--- a/python/paddle/fluid/tests/unittests/test_variable.py
+++ b/python/paddle/fluid/tests/unittests/test_variable.py
@@ -168,7 +168,7 @@ class TestVariable(unittest.TestCase):
             var14 = var[1:-1, 0:2, ::-1]
             var15 = var[::-1, ::-1, ::-1]
 
-            x = fluid.layers.data(name='x', shape=[13], dtype='float32')
+            x = paddle.static.data(name='x', shape=[-1, 13], dtype='float32')
             y = paddle.static.nn.fc(x, size=1, activation=None)
             y_1 = y[:, 0]
             feeder = fluid.DataFeeder(place=place, feed_list=[x])
diff --git a/python/paddle/fluid/tests/unittests/test_weight_decay.py b/python/paddle/fluid/tests/unittests/test_weight_decay.py
index e125b5876f9ae7e635c6b7677b9d7164f26eb0f7..9a36be6173b3833037d678a1d72723a132fbfd1f 100644
--- a/python/paddle/fluid/tests/unittests/test_weight_decay.py
+++ b/python/paddle/fluid/tests/unittests/test_weight_decay.py
@@ -147,10 +147,12 @@ class TestWeightDecay(unittest.TestCase):
         startup_prog = fluid.framework.Program()
         startup_prog.random_seed = 1
         with prog_scope_guard(main_prog=main_prog, startup_prog=startup_prog):
-            data = fluid.layers.data(
-                name="words", shape=[1], dtype="int64", lod_level=1
+            data = paddle.static.data(
+                name="words", shape=[-1, 1], dtype="int64", lod_level=1
+            )
+            label = paddle.static.data(
+                name="label", shape=[-1, 1], dtype="int64"
             )
-            label = fluid.layers.data(name="label", shape=[1], dtype="int64")
             avg_cost = model(data, label, len(self.word_dict))
 
             param_list = [
diff --git a/python/paddle/fluid/tests/unittests/test_weight_normalization.py b/python/paddle/fluid/tests/unittests/test_weight_normalization.py
index e57e3ef9f0b767a7395e9794b42d3d37466f2b73..17a05bdb01caab7e930ad37e8c703e0ea4f557fe 100644
--- a/python/paddle/fluid/tests/unittests/test_weight_normalization.py
+++ b/python/paddle/fluid/tests/unittests/test_weight_normalization.py
@@ -35,8 +35,8 @@ class TestWeightNormalization(unittest.TestCase):
 
     @classmethod
     def set_program(cls):
-        data = fluid.layers.data(
-            name=cls.data_desc[0][0], shape=cls.data_desc[0][1]
+        data = paddle.static.data(
+            name=cls.data_desc[0][0], shape=[-1] + cls.data_desc[0][1]
         )
         out = paddle.static.nn.fc(
             x=data,
diff --git a/python/paddle/fluid/tests/unittests/test_where_op.py b/python/paddle/fluid/tests/unittests/test_where_op.py
index 34540f64e249c3ade8114e2f01e580a418e46cb9..32ee83614b7765b7c00ec405264651fc40e589e6 100644
--- a/python/paddle/fluid/tests/unittests/test_where_op.py
+++ b/python/paddle/fluid/tests/unittests/test_where_op.py
@@ -78,17 +78,22 @@ class TestWhereAPI(unittest.TestCase):
         for x_stop_gradient in [False, True]:
             for y_stop_gradient in [False, True]:
                 with fluid.program_guard(Program(), Program()):
-                    cond = fluid.layers.data(
-                        name='cond', shape=self.shape, dtype='bool'
+                    cond = paddle.static.data(
+                        name='cond', shape=[-1] + self.shape, dtype='bool'
                     )
-                    x = fluid.layers.data(
-                        name='x', shape=self.shape, dtype='float32'
+                    cond.desc.set_need_check_feed(False)
+                    x = paddle.static.data(
+                        name='x', shape=[-1] + self.shape, dtype='float32'
                     )
-                    y = fluid.layers.data(
-                        name='y', shape=self.shape, dtype='float32'
+                    x.desc.set_need_check_feed(False)
+                    y = paddle.static.data(
+                        name='y', shape=[-1] + self.shape, dtype='float32'
                     )
+                    y.desc.set_need_check_feed(False)
                     x.stop_gradient = x_stop_gradient
+                    x.desc.set_need_check_feed(False)
                     y.stop_gradient = y_stop_gradient
+                    y.desc.set_need_check_feed(False)
                     result = paddle.where(cond, x, y)
                     append_backward(paddle.mean(result))
                     for use_cuda in [False, True]:
@@ -127,8 +132,10 @@ class TestWhereAPI(unittest.TestCase):
     def test_api_broadcast(self, use_cuda=False):
         main_program = Program()
         with fluid.program_guard(main_program):
-            x = fluid.layers.data(name='x', shape=[4, 1], dtype='float32')
-            y = fluid.layers.data(name='y', shape=[4, 2], dtype='float32')
+            x = paddle.static.data(name='x', shape=[-1, 4, 1], dtype='float32')
+            x.desc.set_need_check_feed(False)
+            y = paddle.static.data(name='y', shape=[-1, 4, 2], dtype='float32')
+            y.desc.set_need_check_feed(False)
             x_i = np.array([[0.9383, 0.1983, 3.2, 1.2]]).astype('float32')
             y_i = np.array([[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0]]).astype(
                 'float32'
@@ -151,9 +158,10 @@ class TestWhereAPI(unittest.TestCase):
         main_program = Program()
         with fluid.program_guard(main_program):
             cond_shape = [2, 4]
-            cond = fluid.layers.data(
-                name='cond', shape=cond_shape, dtype='bool'
+            cond = paddle.static.data(
+                name='cond', shape=[-1] + cond_shape, dtype='bool'
             )
+            cond.desc.set_need_check_feed(False)
             x_data = 1.0
             y_data = 2.0
             cond_data = np.array([False, False, True, True]).astype('bool')
@@ -175,11 +183,18 @@ class TestWhereAPI(unittest.TestCase):
         paddle.enable_static()
         main_program = Program()
         with fluid.program_guard(main_program):
-            cond = fluid.layers.data(
-                name='cond', shape=cond_shape, dtype='bool'
+            cond = paddle.static.data(
+                name='cond', shape=[-1] + cond_shape, dtype='bool'
             )
-            x = fluid.layers.data(name='x', shape=x_shape, dtype='float32')
-            y = fluid.layers.data(name='y', shape=y_shape, dtype='float32')
+            x = paddle.static.data(
+                name='x', shape=[-1] + x_shape, dtype='float32'
+            )
+            y = paddle.static.data(
+                name='y', shape=[-1] + y_shape, dtype='float32'
+            )
+            x.desc.set_need_check_feed(False)
+            y.desc.set_need_check_feed(False)
+            cond.desc.set_need_check_feed(False)
             cond_data_tmp = np.random.random(size=cond_shape).astype('float32')
             cond_data = cond_data_tmp < 0.3
             x_data = np.random.random(size=x_shape).astype('float32')
@@ -330,7 +345,8 @@ class TestWhereDygraphAPI(unittest.TestCase):
     def test_where_condition(self):
         data = np.array([[True, False], [False, True]])
         with program_guard(Program(), Program()):
-            x = fluid.layers.data(name='x', shape=[(-1), 2])
+            x = paddle.static.data(name='x', shape=[(-1), 2], dtype='float32')
+            x.desc.set_need_check_feed(False)
             y = paddle.where(x)
             self.assertEqual(type(y), tuple)
             self.assertEqual(len(y), 2)
@@ -343,7 +359,8 @@ class TestWhereDygraphAPI(unittest.TestCase):
         np.testing.assert_allclose(expect_out, np.array(res), rtol=1e-05)
         data = np.array([True, True, False])
         with program_guard(Program(), Program()):
-            x = fluid.layers.data(name='x', shape=[(-1)])
+            x = paddle.static.data(name='x', shape=[(-1)], dtype='float32')
+            x.desc.set_need_check_feed(False)
             y = paddle.where(x)
             self.assertEqual(type(y), tuple)
             self.assertEqual(len(y), 1)
@@ -369,9 +386,14 @@ class TestWhereOpError(unittest.TestCase):
             self.assertRaises(TypeError, test_Variable)
 
             def test_type():
-                x = fluid.layers.data(name='x', shape=[4], dtype='bool')
-                y = fluid.layers.data(name='y', shape=[4], dtype='float16')
-                cond = fluid.layers.data(name='cond', shape=[4], dtype='int32')
+                x = paddle.static.data(name='x', shape=[-1, 4], dtype='bool')
+                x.desc.set_need_check_feed(False)
+                y = paddle.static.data(name='y', shape=[-1, 4], dtype='float16')
+                y.desc.set_need_check_feed(False)
+                cond = paddle.static.data(
+                    name='cond', shape=[-1, 4], dtype='int32'
+                )
+                cond.desc.set_need_check_feed(False)
                 paddle.where(cond, x, y)
 
             self.assertRaises(TypeError, test_type)
diff --git a/python/paddle/fluid/tests/unittests/test_while_loop_op.py b/python/paddle/fluid/tests/unittests/test_while_loop_op.py
index c5c31ac5717606214dac397c5734c014f3e05e77..c0a5b6a8cb9d3c47e28349a08666910bd20ac232 100644
--- a/python/paddle/fluid/tests/unittests/test_while_loop_op.py
+++ b/python/paddle/fluid/tests/unittests/test_while_loop_op.py
@@ -608,7 +608,7 @@ class TestApiWhileLoopSliceInBody(unittest.TestCase):
         main_program = Program()
         startup_program = Program()
         with program_guard(main_program, startup_program):
-            x = fluid.layers.data(name='x', shape=[5], dtype='int32')
+            x = paddle.static.data(name='x', shape=[-1, 5], dtype='int32')
             z = fluid.layers.fill_constant([1], 'int32', 0)
             x_shape = paddle.shape(x)
             i = fluid.layers.fill_constant([1], 'int32', 0)
diff --git a/python/paddle/fluid/tests/unittests/test_while_op.py b/python/paddle/fluid/tests/unittests/test_while_op.py
index 64b1ad2125b565163d44f8d14f0dc685b1470788..a78dbe6b7ea70577bda55e61ac4887c8ede45a3f 100644
--- a/python/paddle/fluid/tests/unittests/test_while_op.py
+++ b/python/paddle/fluid/tests/unittests/test_while_op.py
@@ -28,15 +28,9 @@ paddle.enable_static()
 
 class TestWhileOp(unittest.TestCase):
     def simple_net(self):
-        d0 = layers.data(
-            "d0", shape=[10], append_batch_size=False, dtype='float32'
-        )
-        d1 = layers.data(
-            "d1", shape=[10], append_batch_size=False, dtype='float32'
-        )
-        d2 = layers.data(
-            "d2", shape=[10], append_batch_size=False, dtype='float32'
-        )
+        d0 = paddle.static.data("d0", shape=[10], dtype='float32')
+        d1 = paddle.static.data("d1", shape=[10], dtype='float32')
+        d2 = paddle.static.data("d2", shape=[10], dtype='float32')
         i = layers.zeros(shape=[1], dtype='int64')
         i.stop_gradient = True
         init = layers.zeros(shape=[10], dtype='float32')
@@ -151,8 +145,10 @@ class TestIgnoreVarNameInWhile(unittest.TestCase):
             i = i + 1
             return [i, ten, batch_info, origin_seq]
 
-        x = fluid.layers.data(name='x', shape=[-1, 1, 4])
-        y = fluid.layers.data(name='y', shape=[-1, 1, 1])
+        x = paddle.static.data(name='x', shape=[-1, 1, 4], dtype='float32')
+        y = paddle.static.data(name='y', shape=[-1, 1, 1], dtype='float32')
+        x.desc.set_need_check_feed(False)
+        y.desc.set_need_check_feed(False)
         temp = layers.concat(input=[x, y], axis=-1)
         i = layers.fill_constant(shape=[1], value=0, dtype='int32')
         num = layers.fill_constant(shape=[1], value=5, dtype='int32')
@@ -207,7 +203,7 @@ class TestOutputsMustExistsInputs(unittest.TestCase):
                 return s
 
             paddle.enable_static()
-            x = paddle.static.data(shape=[-1], name='x')
+            x = paddle.static.data(shape=[-1], name='x', dtype='float32')
             func(x)
         for op in main_program.block(0).ops:
             if op.type == "while":
diff --git a/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py b/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py
index e18c0bec99be0e08ffca8457f85611df12b2c00a..11d85b52446b2c09537cbeb642059240ed0a018a 100644
--- a/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py
@@ -18,7 +18,6 @@ import numpy as np
 from decorator_helper import prog_scope
 
 import paddle
-import paddle.fluid as fluid
 import paddle.nn.functional as F
 
 unary_api_list = [
@@ -100,8 +99,8 @@ class TestUnaryAPI(unittest.TestCase):
         for api in unary_api_list:
             x = paddle.rand([])
             x.stop_gradient = False
-            x.retain_grads()
             out = api(x)
+
             out.retain_grads()
             out.backward()
 
@@ -123,10 +122,12 @@ class TestUnaryAPI(unittest.TestCase):
         paddle.enable_static()
 
         for api in unary_api_list:
-            main_prog = fluid.Program()
+            main_prog = paddle.static.Program()
             block = main_prog.global_block()
             exe = paddle.static.Executor()
-            with fluid.program_guard(main_prog, fluid.Program()):
+            with paddle.static.program_guard(
+                main_prog, paddle.static.Program()
+            ):
                 x = paddle.rand([])
                 x.stop_gradient = False
                 out = api(x)
@@ -202,29 +203,34 @@ class TestReduceAPI(unittest.TestCase):
             else:
                 x = paddle.rand([])
             x.stop_gradient = False
-            x.retain_grads()
             out = api(x, None)
+
             out.retain_grads()
             out.backward()
 
+            out_empty_list = api(x, [])
+            self.assertEqual(out_empty_list, out)
+
             self.assertEqual(x.shape, [])
             self.assertEqual(out.shape, [])
-            self.assertEqual(out.numpy(), x.numpy())
+            np.testing.assert_allclose(out.numpy(), x.numpy())
             if x.grad is not None:
                 self.assertEqual(x.grad.shape, [])
-                self.assertEqual(x.grad.numpy(), 1.0)
                 self.assertEqual(out.grad.shape, [])
-                self.assertEqual(out.grad.numpy(), 1.0)
+                np.testing.assert_allclose(x.grad.numpy(), np.array(1.0))
+                np.testing.assert_allclose(out.grad.numpy(), np.array(1.0))
 
         paddle.enable_static()
 
     def test_static_reduce(self):
         paddle.enable_static()
         for api in reduce_api_list:
-            main_prog = fluid.Program()
+            main_prog = paddle.static.Program()
             block = main_prog.global_block()
             exe = paddle.static.Executor()
-            with fluid.program_guard(main_prog, fluid.Program()):
+            with paddle.static.program_guard(
+                main_prog, paddle.static.Program()
+            ):
                 # 1) x is 0D
                 if api in [paddle.all, paddle.any]:
                     x = paddle.randint(0, 2, []).astype('bool')
@@ -234,6 +240,9 @@ class TestReduceAPI(unittest.TestCase):
                 out = api(x, None)
                 paddle.static.append_backward(out.sum())
 
+                out_empty_list = api(x, None)
+                self.assertEqual(out_empty_list.shape, ())
+
                 fetch_list = [x, out]
                 if block.has_var(x.grad_name):
                     fetch_list.extend([x.grad_name, out.grad_name])
@@ -241,12 +250,12 @@ class TestReduceAPI(unittest.TestCase):
                 res = exe.run(main_prog, fetch_list=fetch_list)
                 self.assertEqual(res[0].shape, ())
                 self.assertEqual(res[1].shape, ())
-                self.assertEqual(res[0], res[1])
+                np.testing.assert_allclose(res[0], res[1])
                 if len(res) > 2:
                     self.assertEqual(res[2].shape, ())
                     self.assertEqual(res[3].shape, ())
-                    self.assertEqual(res[2], 1.0)
-                    self.assertEqual(res[3], 1.0)
+                    np.testing.assert_allclose(res[2], np.array(1.0))
+                    np.testing.assert_allclose(res[3], np.array(1.0))
 
         paddle.disable_static()
 
@@ -293,8 +302,6 @@ class TestBinaryAPI(unittest.TestCase):
             y = paddle.rand([])
             x.stop_gradient = False
             y.stop_gradient = False
-            x.retain_grads()
-            y.retain_grads()
             if isinstance(api, dict):
                 out = api['func'](x, y)
                 out_cls = getattr(paddle.Tensor, api['cls_method'])(x, y)
@@ -318,8 +325,6 @@ class TestBinaryAPI(unittest.TestCase):
             y = paddle.rand([])
             x.stop_gradient = False
             y.stop_gradient = False
-            x.retain_grads()
-            y.retain_grads()
             if isinstance(api, dict):
                 out = api['func'](x, y)
                 out_cls = getattr(paddle.Tensor, api['cls_method'])(x, y)
@@ -341,8 +346,6 @@ class TestBinaryAPI(unittest.TestCase):
             # 3) x is 0D , y is ND
             x = paddle.rand([])
             y = paddle.rand([2, 3, 4])
-            x.retain_grads()
-            y.retain_grads()
             x.stop_gradient = False
             y.stop_gradient = False
             if isinstance(api, dict):
@@ -366,10 +369,10 @@ class TestBinaryAPI(unittest.TestCase):
             # 4) x is 0D , y is scalar
             x = paddle.rand([])
             x.stop_gradient = False
-            x.retain_grads()
             y = 0.5
             if isinstance(api, dict):
                 out = getattr(paddle.Tensor, api['cls_method'])(x, y)
+
                 out.retain_grads()
                 out.backward()
 
@@ -403,9 +406,11 @@ class TestBinaryAPI(unittest.TestCase):
     def test_static_binary(self):
         paddle.enable_static()
         for api in binary_api_list:
-            main_prog = fluid.Program()
+            main_prog = paddle.static.Program()
             block = main_prog.global_block()
-            with fluid.program_guard(main_prog, fluid.Program()):
+            with paddle.static.program_guard(
+                main_prog, paddle.static.Program()
+            ):
                 # 1) x is 0D, y is 0D
                 x = paddle.rand([])
                 y = paddle.rand([])
@@ -511,8 +516,10 @@ class TestBinaryAPI(unittest.TestCase):
                 '''
 
         for api in binary_int_api_list:
-            main_prog = fluid.Program()
-            with fluid.program_guard(main_prog, fluid.Program()):
+            main_prog = paddle.static.Program()
+            with paddle.static.program_guard(
+                main_prog, paddle.static.Program()
+            ):
                 # 1) x is 0D, y is 0D
                 x = paddle.randint(-10, 10, [])
                 y = paddle.randint(-10, 10, [])
@@ -541,10 +548,43 @@ class TestSundryAPI(unittest.TestCase):
         paddle.disable_static()
         self.x = paddle.rand([])
 
+    def test_quantile(self):
+        # 1) x is 0D
+        x = paddle.rand([])
+        x.stop_gradient = False
+        out = paddle.quantile(x, 0.5, axis=None)
+
+        out.retain_grads()
+        out.backward()
+
+        out_empty_list = paddle.quantile(x, 0.5, axis=[])
+        self.assertEqual(out_empty_list, out)
+
+        self.assertEqual(x.shape, [])
+        self.assertEqual(out.shape, [])
+        self.assertEqual(out, x)
+
+        self.assertEqual(x.grad.shape, [])
+        self.assertEqual(x.grad, 1.0)
+        self.assertEqual(out.grad.shape, [])
+        self.assertEqual(out.grad, 1.0)
+
+        # 2) x is ND
+        x = paddle.rand([2, 3])
+        x.stop_gradient = False
+        out = paddle.quantile(x, 0.5, axis=None)
+
+        out.retain_grads()
+        out.backward()
+
+        self.assertEqual(out.shape, [])
+        self.assertEqual(out.grad.shape, [])
+        self.assertEqual(out.grad, 1.0)
+        self.assertEqual(x.grad.shape, [2, 3])
+
     def test_flip(self):
         x = paddle.rand([])
         x.stop_gradient = False
-        x.retain_grads()
         out = paddle.flip(x, axis=[])
         out.retain_grads()
         out.backward()
@@ -636,7 +676,6 @@ class TestSundryAPI(unittest.TestCase):
     def test_pow_factor(self):
         x = paddle.rand([])
         x.stop_gradient = False
-        x.retain_grads()
         out = paddle.pow(x, 2.0)
         out.retain_grads()
         out.backward()
@@ -648,7 +687,6 @@ class TestSundryAPI(unittest.TestCase):
     def test_cast(self):
         x = paddle.full([], 1.0, 'float32')
         x.stop_gradient = False
-        x.retain_grads()
         out = paddle.cast(x, 'int32')
         out.retain_grads()
         out.backward()
@@ -660,7 +698,6 @@ class TestSundryAPI(unittest.TestCase):
     def test_cumprod(self):
         x = paddle.full([], 1.0, 'float32')
         x.stop_gradient = False
-        x.retain_grads()
         out = paddle.cumprod(x, 0)
         out.retain_grads()
         out.backward()
@@ -675,7 +712,6 @@ class TestSundryAPI(unittest.TestCase):
     def test_clip(self):
         x = paddle.uniform([], None, -10, 10)
         x.stop_gradient = False
-        x.retain_grads()
         out = paddle.clip(x, -5, 5)
         out.retain_grads()
         out.backward()
@@ -687,7 +723,6 @@ class TestSundryAPI(unittest.TestCase):
     def test_increment(self):
         x = paddle.rand([])
         x.stop_gradient = False
-        x.retain_grads()
         out = paddle.increment(x, 1.0)
         out.retain_grads()
         out.backward()
@@ -711,18 +746,49 @@ class TestSundryAPI(unittest.TestCase):
         self.assertEqual(out.shape, [])
 
     def test_searchsorted(self):
+        # have no backward
         x = paddle.to_tensor([1, 3, 5, 7, 9])
         y = paddle.rand([])
 
-        # only has forward kernel
         out = paddle.searchsorted(x, y)
 
         self.assertEqual(out.shape, [])
         self.assertEqual(out.numpy(), 0)
 
+    def test_transpose(self):
+        x = paddle.rand([])
+        x.stop_gradient = False
+        out = paddle.transpose(x, [])
+        out.retain_grads()
+        out.backward()
+
+        self.assertEqual(out.shape, [])
+        self.assertEqual(out, x)
+        self.assertEqual(out.grad.shape, [])
+        self.assertEqual(x.grad.shape, [])
+        self.assertEqual(x.grad, 1.0)
+
+        with self.assertRaises(ValueError):
+            x = paddle.transpose(x, [0])
+
+    def test_moveaxis(self):
+        x = paddle.rand([])
+        x.stop_gradient = False
+        out = paddle.moveaxis(x, [], [])
+        out.retain_grads()
+        out.backward()
+
+        self.assertEqual(out.shape, [])
+        self.assertEqual(out, x)
+        self.assertEqual(out.grad.shape, [])
+        self.assertEqual(x.grad.shape, [])
+        self.assertEqual(x.grad, 1.0)
+
+        with self.assertRaises(AssertionError):
+            x = paddle.moveaxis(x, [1], [0])
+
     def test_gather_1D(self):
         x = paddle.to_tensor([1.0, 3.0, 5.0, 7.0, 9.0], stop_gradient=False)
-        x.retain_grads()
         index = paddle.full([], 2, 'int64')
         out = paddle.gather(x, index)
         out.retain_grads()
@@ -737,7 +803,6 @@ class TestSundryAPI(unittest.TestCase):
         x = paddle.to_tensor(
             [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], stop_gradient=False
         )
-        x.retain_grads()
         index = paddle.full([], 1, 'int64')
         out = paddle.gather(x, index)
         out.retain_grads()
@@ -752,7 +817,6 @@ class TestSundryAPI(unittest.TestCase):
         x = paddle.to_tensor(
             [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], stop_gradient=False
         )
-        x.retain_grads()
         index = paddle.full([], 1, 'int64')
         out = paddle.gather(x, index, axis=1)
         out.retain_grads()
@@ -763,9 +827,8 @@ class TestSundryAPI(unittest.TestCase):
         self.assertEqual(x.grad.shape, [2, 3])
         self.assertEqual(out.grad.shape, [2])
 
-    def test_scatter_1D(self):
+    def _test_scatter_1D(self):
         x = paddle.to_tensor([1.0, 3.0, 5.0, 7.0, 9.0], stop_gradient=False)
-        x.retain_grads()
         index = paddle.full([], 2, 'int64')
         updates = paddle.full([], 4.0)
         out = paddle.scatter(x, index, updates)
@@ -776,7 +839,7 @@ class TestSundryAPI(unittest.TestCase):
         self.assertEqual(out.numpy()[2], 4)
         self.assertEqual(out.grad.shape, [5])
 
-    def test_scatter_XD(self):
+    def _test_scatter_XD(self):
         x = paddle.to_tensor(
             [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], stop_gradient=False
         )
@@ -866,7 +929,6 @@ class TestSundryAPI(unittest.TestCase):
 
             x = paddle.randn(())
             x.stop_gradient = False
-            x.retain_grads()
 
             out = paddle.kthvalue(x, 1)
             out[0].backward()
@@ -887,7 +949,6 @@ class TestSundryAPI(unittest.TestCase):
             paddle.set_device(place)
 
             x = paddle.randn(())
-            x.retain_grads()
             x.stop_gradient = False
 
             out = paddle.mode(x)
@@ -904,7 +965,6 @@ class TestSundryAPI(unittest.TestCase):
     def test_flatten(self):
         x = paddle.rand([])
         x.stop_gradient = False
-        x.retain_grads()
 
         start_axis = 0
         stop_axis = -1
@@ -925,8 +985,8 @@ class TestSundryAPI(unittest.TestCase):
     def test_scale(self):
         x = paddle.rand([])
         x.stop_gradient = False
-        x.retain_grads()
         out = paddle.scale(x, scale=2.0, bias=1.0)
+
         out.retain_grads()
         out.backward()
 
@@ -1018,9 +1078,8 @@ class TestSundryAPI(unittest.TestCase):
     def test_reshape_list(self):
         x = paddle.rand([])
         x.stop_gradient = False
-        x.retain_grads()
-
         out = paddle.reshape(x, [])
+
         out.retain_grads()
         out.backward()
         self.assertEqual(x.grad.shape, [])
@@ -1050,10 +1109,9 @@ class TestSundryAPI(unittest.TestCase):
 
     def test_reshape_tensor(self):
         x = paddle.rand([1, 1])
-        x.retain_grads()
         x.stop_gradient = False
-
         out = paddle.reshape(x, [])
+
         out.retain_grads()
         out.backward()
         self.assertEqual(x.grad.shape, [1, 1])
@@ -1237,7 +1295,6 @@ class TestSundryAPI(unittest.TestCase):
 
             x = paddle.randn(())
             x.stop_gradient = False
-            x.retain_grads()
 
             out = paddle.repeat_interleave(x, 2, None)
             out.backward()
@@ -1345,6 +1402,23 @@ class TestSundryAPI(unittest.TestCase):
         self.assertEqual(x.grad.shape, [])
         self.assertEqual(x.grad.numpy(), 1)
 
+    def test_unsqueeze(self):
+        x1 = paddle.full([], 2)
+        x1.stop_gradient = False
+        x1.retain_grads()
+        out1 = paddle.unsqueeze(x1, axis=0)
+        out1.retain_grads()
+        out1.backward()
+        self.assertEqual(out1.shape, [1])
+        self.assertEqual(x1.grad.shape, [])
+
+        x2 = paddle.full([], 0, dtype='int32')
+        out2 = paddle.unsqueeze(x1, axis=x2)
+        out2.retain_grads()
+        out2.backward()
+        self.assertEqual(out2.shape, [1])
+        self.assertEqual(x1.grad.shape, [])
+
     def test_t(self):
         x = paddle.full([], 2.0)
         x.stop_gradient = False
@@ -1362,6 +1436,44 @@ class TestSundryAPIStatic(unittest.TestCase):
         paddle.enable_static()
         self.exe = paddle.static.Executor()
 
+    @prog_scope()
+    def test_quantile(self):
+        x1 = paddle.rand([])
+        x1.stop_gradient = False
+        out1 = paddle.quantile(x1, 0.5, axis=None)
+        paddle.static.append_backward(out1.sum())
+
+        x2 = paddle.rand([2, 3])
+        x2.stop_gradient = False
+        out2 = paddle.quantile(x2, 0.5, axis=None)
+        paddle.static.append_backward(out2.sum())
+
+        out_empty_list = paddle.quantile(x1, 0.5, axis=[])
+        self.assertEqual(out_empty_list.shape, ())
+
+        prog = paddle.static.default_main_program()
+        res = self.exe.run(
+            prog,
+            fetch_list=[
+                out1,
+                out2,
+                x1.grad_name,
+                out1.grad_name,
+                x2.grad_name,
+                out2.grad_name,
+            ],
+        )
+        self.assertEqual(res[0].shape, ())
+        self.assertEqual(res[1].shape, ())
+
+        self.assertEqual(res[2].shape, ())
+        self.assertEqual(res[3].shape, ())
+        self.assertEqual(res[3], 1.0)
+
+        self.assertEqual(res[4].shape, (2, 3))
+        self.assertEqual(res[5].shape, ())
+        self.assertEqual(res[5], 1.0)
+
     @prog_scope()
     def test_flip(self):
         x = paddle.rand([])
@@ -1492,6 +1604,42 @@ class TestSundryAPIStatic(unittest.TestCase):
         self.assertEqual(res[0].shape, ())
         self.assertEqual(res[0], 0)
 
+    @prog_scope()
+    def test_transpose(self):
+        x = paddle.rand([])
+        x.stop_gradient = False
+        out = paddle.transpose(x, [])
+        paddle.static.append_backward(out.sum())
+
+        prog = paddle.static.default_main_program()
+        res = self.exe.run(prog, fetch_list=[out, x.grad_name, out.grad_name])
+        self.assertEqual(res[0].shape, ())
+        self.assertEqual(res[1].shape, ())
+        self.assertEqual(res[1], 1.0)
+        self.assertEqual(res[2].shape, ())
+        self.assertEqual(res[2], 1.0)
+
+        with self.assertRaises(ValueError):
+            x = paddle.transpose(x, [0])
+
+    @prog_scope()
+    def test_moveaxis(self):
+        x = paddle.rand([])
+        x.stop_gradient = False
+        out = paddle.moveaxis(x, [], [])
+        paddle.static.append_backward(out.sum())
+
+        prog = paddle.static.default_main_program()
+        res = self.exe.run(prog, fetch_list=[out, x.grad_name, out.grad_name])
+        self.assertEqual(res[0].shape, ())
+        self.assertEqual(res[1].shape, ())
+        self.assertEqual(res[1], 1.0)
+        self.assertEqual(res[2].shape, ())
+        self.assertEqual(res[2], 1.0)
+
+        with self.assertRaises(AssertionError):
+            x = paddle.moveaxis(x, [0], [1])
+
     @prog_scope()
     def test_gather_1D(self):
         x = paddle.full([10], 1.0, 'float32')
@@ -2091,6 +2239,34 @@ class TestSundryAPIStatic(unittest.TestCase):
         self.assertEqual(res[3].shape, ())
         self.assertEqual(res[3], 1)
 
+    @prog_scope()
+    def test_unsqueeze(self):
+        x1 = paddle.full([], 2)
+        out1 = paddle.unsqueeze(x1, axis=0)
+        x1.stop_gradient = False
+        paddle.static.append_backward(out1.sum())
+
+        x2 = paddle.full([], 3)
+        x3 = paddle.full([], 0, dtype='int32')
+        x2.stop_gradient = False
+        out2 = paddle.unsqueeze(x2, axis=x3)
+        paddle.static.append_backward(out2.sum())
+
+        prog = paddle.static.default_main_program()
+        res = self.exe.run(
+            prog,
+            fetch_list=[
+                out1,
+                out2,
+                x1.grad_name,
+                x2.grad_name,
+            ],
+        )
+        self.assertEqual(res[0].shape, (1,))
+        self.assertEqual(res[1].shape, (1,))
+        self.assertEqual(res[2].shape, ())
+        self.assertEqual(res[3].shape, ())
+
     @prog_scope()
     def test_t(self):
         x = paddle.full([], 2.0)
@@ -2106,6 +2282,21 @@ class TestSundryAPIStatic(unittest.TestCase):
         self.assertEqual(res[1].shape, ())
         self.assertEqual(res[2].shape, ())
 
+    @prog_scope()
+    def test_sequence_pad(self):
+        x = paddle.static.data("x", [-1, 2], dtype=paddle.int64, lod_level=1)
+        value = paddle.to_tensor(1000, dtype=paddle.int64).squeeze()
+        out = paddle.static.nn.sequence_pad(x, value)
+
+        x_tensor = paddle.fluid.create_lod_tensor(
+            np.arange(20).astype(np.int64).reshape(-1, 2),
+            [[3, 3, 4]],
+            place=self.exe.place,
+        )
+        prog = paddle.static.default_main_program()
+        res = self.exe.run(prog, feed={"x": x_tensor}, fetch_list=[out])
+        self.assertEqual(res[0].shape, (3, 4, 2))
+
 
 # Use to test API whose zero-dim input tensors don't have grad and not need to test backward in OpTest.
 class TestNoBackwardAPI(unittest.TestCase):
@@ -2254,6 +2445,29 @@ class TestNoBackwardAPI(unittest.TestCase):
         self.assertEqual(one_hot_label.shape, [4])
         self.assertEqual(one_hot_label.numpy()[2], 1)
 
+    def test_unique(self):
+        places = ['cpu']
+        if paddle.is_compiled_with_cuda():
+            places.append('gpu')
+        for place in places:
+            paddle.set_device(place)
+            x = paddle.rand([])
+            y, index, inverse, counts = paddle.unique(
+                x,
+                return_index=True,
+                return_inverse=True,
+                return_counts=True,
+            )
+
+            self.assertEqual(y, x)
+            self.assertEqual(index, 0)
+            self.assertEqual(inverse, 0)
+            self.assertEqual(counts, 1)
+            self.assertEqual(y.shape, [1])
+            self.assertEqual(index.shape, [1])
+            self.assertEqual(inverse.shape, [1])
+            self.assertEqual(counts.shape, [1])
+
 
 class TestNoBackwardAPIStatic(unittest.TestCase):
     def setUp(self):
@@ -2442,7 +2656,7 @@ class TestNoBackwardAPIStatic(unittest.TestCase):
         ids = paddle.full(shape=[], fill_value=1, dtype='int64')
         emb = paddle.static.nn.embedding(ids, (20, 3))
         prog = paddle.static.default_main_program()
-        self.exe.run(paddle.fluid.default_startup_program())
+        self.exe.run(paddle.static.default_startup_program())
         res = self.exe.run(prog, fetch_list=[emb])
         self.assertEqual(res[0].shape, (3,))
 
@@ -2450,12 +2664,201 @@ class TestNoBackwardAPIStatic(unittest.TestCase):
         label = paddle.full(shape=[], fill_value=2, dtype='int64')
         one_hot_label = paddle.nn.functional.one_hot(label, num_classes=4)
         prog = paddle.static.default_main_program()
-        self.exe.run(paddle.fluid.default_startup_program())
+        self.exe.run(paddle.static.default_startup_program())
         res = self.exe.run(prog, fetch_list=[one_hot_label])
 
         self.assertEqual(res[0].shape, (4,))
         self.assertEqual(res[0][2], 1)
 
+    def test_unique(self):
+        x = paddle.rand([])
+        y, index, inverse, counts = paddle.unique(
+            x, return_index=True, return_inverse=True, return_counts=True
+        )
+
+        prog = paddle.static.default_main_program()
+        res = self.exe.run(prog, fetch_list=[y, index, inverse, counts])
+        self.assertEqual(y, x)
+        self.assertEqual(index, 0)
+        self.assertEqual(inverse, 0)
+        self.assertEqual(counts, 1)
+        self.assertEqual(res[0].shape, (1,))
+        self.assertEqual(res[1].shape, (1,))
+        self.assertEqual(res[2].shape, (1,))
+        self.assertEqual(res[3].shape, (1,))
+
+
+unary_apis_with_complex_input = [
+    paddle.real,
+    paddle.imag,
+    paddle.angle,
+    paddle.conj,
+]
+
+
+class TestUnaryElementwiseAPIWithComplexInput(unittest.TestCase):
+    def test_dygraph_unary(self):
+        paddle.disable_static()
+        for api in unary_apis_with_complex_input:
+            x = paddle.to_tensor(2.0 + 3.0j).squeeze()
+            x.stop_gradient = False
+            x.retain_grads()
+            out = api(x)
+            out.retain_grads()
+            out.backward()
+
+            self.assertEqual(x.shape, [])
+            self.assertEqual(out.shape, [])
+            if x.grad is not None:
+                self.assertEqual(x.grad.shape, [])
+                self.assertEqual(out.grad.shape, [])
+
+        paddle.enable_static()
+
+    def test_static_unary(self):
+        paddle.enable_static()
+
+        for api in unary_apis_with_complex_input:
+            main_prog = paddle.static.Program()
+            block = main_prog.global_block()
+            exe = paddle.static.Executor()
+            with paddle.static.program_guard(
+                main_prog, paddle.static.Program()
+            ):
+                # before full support for complex, we cannot create complex tensor with the same code as in dynamic graph
+                x = paddle.complex(
+                    paddle.to_tensor(2.0), paddle.to_tensor(2.0)
+                ).squeeze()
+                x.stop_gradient = False
+                out = api(x)
+                # TODO(zhouwei):
+                # ScaleLossGradOp / append_backward set grad shape to [1]
+                # after output 0D, may change it to []
+                # use out.sum() to avoid this two problem now
+                loss = out.sum()
+                paddle.static.append_backward(loss)
+
+                fetch_list = [x, out]
+                if block.has_var(x.grad_name):
+                    fetch_list.extend([x.grad_name, out.grad_name])
+
+                # 1) Test Program
+                res = exe.run(main_prog, fetch_list=fetch_list)
+                for item in res:
+                    self.assertEqual(item.shape, ())
+
+                # 2) Test CompiledProgram Program
+                if paddle.device.is_compiled_with_cuda():
+                    places = [paddle.CUDAPlace(0)]
+                    expect_shape = ()
+                else:
+                    places = [paddle.CPUPlace()] * 4
+                    expect_shape = (4,)
+                compile_prog = paddle.static.CompiledProgram(
+                    main_prog
+                ).with_data_parallel(loss.name, places=places)
+
+                # return_merged=False #
+                res = exe.run(
+                    compile_prog, fetch_list=fetch_list, return_merged=False
+                )
+                for item1 in res:
+                    for item2 in item1:
+                        self.assertEqual(item2.shape, ())
+
+                # return_merged=True #
+                res = exe.run(
+                    compile_prog, fetch_list=fetch_list, return_merged=True
+                )
+                for item in res:
+                    self.assertEqual(item.shape, expect_shape)
+
+        paddle.disable_static()
+
+
+class TestAsReal(unittest.TestCase):
+    def test_dygraph(self):
+        paddle.disable_static()
+        for api in unary_apis_with_complex_input:
+            x = paddle.to_tensor(2.0 + 3.0j).squeeze()
+            x.stop_gradient = False
+            x.retain_grads()
+            out = paddle.as_real(x)
+            out.retain_grads()
+            out.backward()
+
+            self.assertEqual(x.shape, [])
+            self.assertEqual(out.shape, [2])
+            if x.grad is not None:
+                self.assertEqual(x.grad.shape, [])
+                self.assertEqual(out.grad.shape, [2])
+
+        paddle.enable_static()
+
+    def test_static(self):
+        paddle.enable_static()
+
+        for api in unary_apis_with_complex_input:
+            main_prog = paddle.static.Program()
+            block = main_prog.global_block()
+            exe = paddle.static.Executor()
+            with paddle.static.program_guard(
+                main_prog, paddle.static.Program()
+            ):
+                # before full support for complex, we cannot create complex tensor with the same code as in dynamic graph
+                x = paddle.complex(
+                    paddle.to_tensor(2.0), paddle.to_tensor(2.0)
+                ).squeeze()
+                x.stop_gradient = False
+                out = paddle.as_real(x)
+                self.assertEqual(x.shape, ())
+                self.assertEqual(out.shape, (2,))
+                # TODO(zhouwei):
+                # ScaleLossGradOp / append_backward set grad shape to [1]
+                # after output 0D, may change it to []
+                # use out.sum() to avoid this two problem now
+                loss = out.abs().sum()
+                paddle.static.append_backward(loss)
+
+                fetch_list = [x, out]
+                if block.has_var(x.grad_name):
+                    fetch_list.extend([x.grad_name, out.grad_name])
+
+                # 1) Test Program
+                res = exe.run(main_prog, fetch_list=fetch_list)
+                self.assertEqual(res[0].shape, ())
+                self.assertEqual(res[1].shape, (2,))
+                self.assertEqual(res[2].shape, ())
+                self.assertEqual(res[3].shape, (2,))
+
+                # 2) Test CompiledProgram Program
+                if paddle.device.is_compiled_with_cuda():
+                    places = [paddle.CUDAPlace(0)]
+                    expect_shapes = (), (2,), (), (2,)
+                else:
+                    places = [paddle.CPUPlace()] * 4
+                    expect_shapes = (4,), (8,), (4,), (8,)
+                compile_prog = paddle.static.CompiledProgram(
+                    main_prog
+                ).with_data_parallel(loss.name, places=places)
+
+                # return_merged=False #
+                res = exe.run(
+                    compile_prog, fetch_list=fetch_list, return_merged=False
+                )
+                for out_i, expect in zip(res, [(), (2,), (), (2,)]):
+                    for replica in out_i:
+                        self.assertEqual(replica.shape, expect)
+
+                # return_merged=True #
+                res = exe.run(
+                    compile_prog, fetch_list=fetch_list, return_merged=True
+                )
+                for actual, expect in zip(res, expect_shapes):
+                    self.assertEqual(actual.shape, expect)
+
+        paddle.disable_static()
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/transformer_model.py b/python/paddle/fluid/tests/unittests/transformer_model.py
index f4f755c131c9dc1492c473aa7d62dae6c8e94aac..42436b6e242b445bfceee93996c03dac7f13233b 100644
--- a/python/paddle/fluid/tests/unittests/transformer_model.py
+++ b/python/paddle/fluid/tests/unittests/transformer_model.py
@@ -499,11 +499,13 @@ def build_inputs(max_length, n_head):
 
     all_inputs = []
     for name, shape, dtype in zip(names, shapes, dtypes):
-        all_inputs.append(
-            fluid.layers.data(
-                name=name, shape=shape, dtype=dtype, append_batch_size=False
-            )
+        data_input = paddle.static.data(
+            name=name,
+            shape=shape,
+            dtype=dtype,
         )
+        data_input.desc.set_need_check_feed(False)
+        all_inputs.append(data_input)
     return all_inputs
 
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/collective_allgather_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/collective_allgather_op_xpu.py
index 55304b8b40b4387fd8903f85fb977579c6f15f04..5cd5a92f4b2c4862d1d334eb6923fb08bd352d2f 100644
--- a/python/paddle/fluid/tests/unittests/xpu/collective_allgather_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/collective_allgather_op_xpu.py
@@ -16,7 +16,6 @@ from test_collective_base_xpu import TestCollectiveRunnerBase, runtime_main
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 from paddle.fluid import core
 
 paddle.enable_static()
@@ -30,8 +29,8 @@ class TestCollectiveAllGather(TestCollectiveRunnerBase):
         ring_id = 0
         nranks = 2
         with fluid.program_guard(main_prog, startup_program):
-            tindata = layers.data(
-                name="tindata", shape=[10, 1000], dtype='float32'
+            tindata = paddle.static.data(
+                name="tindata", shape=[-1, 10, 1000], dtype='float32'
             )
             toutdata = main_prog.current_block().create_var(
                 name="outofgather",
diff --git a/python/paddle/fluid/tests/unittests/xpu/collective_allreduce_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/collective_allreduce_op_xpu.py
index 32196b77d258f045ba071a026e3f0ca1261bfab9..54b60f76665e3740b818d1bc50f03f17097b01e8 100644
--- a/python/paddle/fluid/tests/unittests/xpu/collective_allreduce_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/collective_allreduce_op_xpu.py
@@ -18,7 +18,6 @@ from test_collective_base_xpu import TestCollectiveRunnerBase, runtime_main
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 from paddle.fluid import core
 
 paddle.enable_static()
@@ -31,8 +30,8 @@ class TestCollectiveAllReduce(TestCollectiveRunnerBase):
     def get_model(self, main_prog, startup_program):
         ring_id = 0
         with fluid.program_guard(main_prog, startup_program):
-            tindata = layers.data(
-                name="tindata", shape=[10, 1000], dtype='float32'
+            tindata = paddle.static.data(
+                name="tindata", shape=[-1, 10, 1000], dtype='float32'
             )
             toutdata = main_prog.current_block().create_var(
                 name="outofreduce",
diff --git a/python/paddle/fluid/tests/unittests/xpu/collective_identity_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/collective_identity_op_xpu.py
index 5a17539cbfb88596556216c473f78f417e829a3b..8fea9d7a4ac0e5ff4831949376fbb3e2c2ff333a 100644
--- a/python/paddle/fluid/tests/unittests/xpu/collective_identity_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/collective_identity_op_xpu.py
@@ -16,7 +16,6 @@ from test_collective_base_xpu import TestCollectiveRunnerBase, runtime_main
 
 import paddle
 import paddle.fluid as fluid
-import paddle.fluid.layers as layers
 from paddle.fluid import core
 
 paddle.enable_static()
@@ -30,8 +29,8 @@ class TestCollectiveIdentity(TestCollectiveRunnerBase):
         ring_id = 0
         nranks = 2
         with fluid.program_guard(main_prog, startup_program):
-            tindata = layers.data(
-                name="tindata", shape=[10, 1000], dtype='float32'
+            tindata = paddle.static.data(
+                name="tindata", shape=[-1, 10, 1000], dtype='float32'
             )
             toutdata = main_prog.current_block().create_var(
                 name="outofgather",
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_adadelta_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_adadelta_op_xpu.py
index f503e3cd4f595a22d3181efeba24756b069a0633..4de36728504ac875bacfd4a135f02d7c1c7ab045 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_adadelta_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_adadelta_op_xpu.py
@@ -168,8 +168,12 @@ class XPUTestAdadelta(XPUOpTestWrapper):
             place = fluid.XPUPlace(0)
             main = fluid.Program()
             with fluid.program_guard(main):
-                x = fluid.layers.data(name='x', shape=[13], dtype=self.dtype)
-                y = fluid.layers.data(name='y', shape=[1], dtype=self.dtype)
+                x = paddle.static.data(
+                    name='x', shape=[-1, 13], dtype=self.dtype
+                )
+                y = paddle.static.data(
+                    name='y', shape=[-1, 1], dtype=self.dtype
+                )
                 y_predict = paddle.static.nn.fc(x, size=1, activation=None)
                 cost = paddle.nn.functional.square_error_cost(
                     input=y_predict, label=y
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_clip_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_clip_op_xpu.py
index 075ff7f7e8d9eb7eec3a38006c0505ad98f79e95..4bf88d40b7a98babc47fdb4a6a8f8e1c56e8c7a6 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_clip_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_clip_op_xpu.py
@@ -1,4 +1,4 @@
-#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#   Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -27,7 +27,7 @@ from xpu.get_test_cover_info import (
 
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid import Program, program_guard
+from paddle.fluid import Program, core, program_guard
 
 
 class XPUTestClipOp(XPUOpTestWrapper):
@@ -51,7 +51,7 @@ class XPUTestClipOp(XPUOpTestWrapper):
 
         def set_xpu(self):
             self.__class__.use_xpu = True
-            self.__class__.no_need_check_grad = True
+            self.__class__.no_need_check_grad = False
             self.__class__.op_type = self.dtype
 
         def init_data(self):
@@ -91,6 +91,16 @@ class XPUTestClipOp(XPUOpTestWrapper):
             self.check_output_with_place(self.place)
             paddle.disable_static()
 
+        def test_check_grad(self):
+            if hasattr(self, "no_need_check_grad") and self.no_need_check_grad:
+                return
+            if core.is_compiled_with_xpu():
+                paddle.enable_static()
+                self.check_grad_with_place(
+                    self.place, ['X'], 'Out', check_eager=True
+                )
+                paddle.disable_static()
+
     class TestClipOp1(TestClipOp):
         def init_data(self):
             self.shape = (8, 16, 8)
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_conv3d_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_conv3d_op_xpu.py
index 915fb249514a964523589c95245efed158cfa93c..1367b972dd26fc38fd628f9c7dc13c33a678be76 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_conv3d_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_conv3d_op_xpu.py
@@ -22,7 +22,6 @@ from op_test_xpu import XPUOpTest
 from xpu.get_test_cover_info import XPUOpTestWrapper, create_test_class
 
 import paddle
-import paddle.fluid as fluid
 
 
 def conv3d_forward_naive(
@@ -567,17 +566,15 @@ class XPUTestConv3DOp_v2(XPUOpTestWrapper):
 class TestConv3DAPI(unittest.TestCase):
     def test_api(self):
 
-        input_NDHWC = fluid.layers.data(
+        input_NDHWC = paddle.static.data(
             name="input_NDHWC",
             shape=[2, 5, 5, 5, 3],
-            append_batch_size=False,
             dtype="float32",
         )
 
-        input_NCDHW = fluid.layers.data(
+        input_NCDHW = paddle.static.data(
             name="input_NCDHW",
             shape=[2, 3, 5, 5, 3],
-            append_batch_size=False,
             dtype="float32",
         )
 
@@ -650,10 +647,9 @@ class TestConv3DAPI(unittest.TestCase):
 
 class TestConv3DAPI_Error(unittest.TestCase):
     def test_api(self):
-        input = fluid.layers.data(
+        input = paddle.static.data(
             name="input",
             shape=[2, 5, 5, 5, 4],
-            append_batch_size=False,
             dtype="float32",
         )
 
@@ -736,10 +732,9 @@ class TestConv3DAPI_Error(unittest.TestCase):
         self.assertRaises(ValueError, run_5)
 
         # ValueError: channel dimmention
-        x = fluid.layers.data(
+        x = paddle.static.data(
             name="x",
             shape=[2, 5, 5, 5, -1],
-            append_batch_size=False,
             dtype="float32",
         )
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_dropout_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_dropout_op_xpu.py
index 011dd8fb9d88ef8280115f5a843f0e56c6642b16..fca45412fa61773dd06fb1ea0edef57a609a5337 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_dropout_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_dropout_op_xpu.py
@@ -141,8 +141,8 @@ class XPUTestDropoutOp(XPUOpTestWrapper):
                 def test_dtype():
                     # the input dtype of dropout must be float16 or float32 or float64
                     # float16 only can be set on GPU place
-                    x2 = fluid.layers.data(
-                        name='x2', shape=[3, 4, 5, 6], dtype="int32"
+                    x2 = paddle.static.data(
+                        name='x2', shape=[-1, 3, 4, 5, 6], dtype="int32"
                     )
                     paddle.nn.functional.dropout(x2, p=0.5)
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_expand_as_v2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_expand_as_v2_op_xpu.py
index 9f7d39ca346aad367e855886eda60756ec244c30..8388b59a2df2b09297f56a5e15ddd2f35a038648 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_expand_as_v2_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_expand_as_v2_op_xpu.py
@@ -134,14 +134,11 @@ class TestExpandAsV2API(unittest.TestCase):
     def test_api(self):
         input1 = np.random.random([12, 14]).astype("float32")
         input2 = np.random.random([2, 12, 14]).astype("float32")
-        x = fluid.layers.data(
-            name='x', shape=[12, 14], append_batch_size=False, dtype="float32"
-        )
+        x = paddle.static.data(name='x', shape=[12, 14], dtype="float32")
 
-        y = fluid.layers.data(
+        y = paddle.static.data(
             name='target_tensor',
             shape=[2, 12, 14],
-            append_batch_size=False,
             dtype="float32",
         )
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_expand_v2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_expand_v2_op_xpu.py
index 07039053dc0c6a99ca29b18c2d940e96cb0feb7a..0c4b4abc5cbc36728f96063d090049cd9dff64ab 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_expand_v2_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_expand_v2_op_xpu.py
@@ -197,18 +197,16 @@ class TestExpandV2API(unittest.TestCase):
     def test_static(self):
         with fluid.program_guard(fluid.Program(), fluid.Program()):
             input = np.random.random([12, 14]).astype("float32")
-            x = fluid.layers.data(
+            x = paddle.static.data(
                 name='x',
                 shape=[12, 14],
-                append_batch_size=False,
                 dtype="float32",
             )
 
             positive_2 = fluid.layers.fill_constant([1], "int32", 12)
-            expand_shape = fluid.layers.data(
+            expand_shape = paddle.static.data(
                 name="expand_shape",
                 shape=[2],
-                append_batch_size=False,
                 dtype="int32",
             )
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_index_select_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_index_select_op_xpu.py
index 85818e5a6a4ccc4b1763c072558279bc505fa20e..dde9ddb9168cc6073be1805a3be7804d645531a7 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_index_select_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_index_select_op_xpu.py
@@ -94,7 +94,7 @@ class TestIndexSelectAPI(unittest.TestCase):
                 [5.0, 6.0, 7.0, 8.0],
                 [9.0, 10.0, 11.0, 12.0],
             ]
-        )
+        ).astype('float32')
         self.data_index = np.array([0, 1, 1]).astype('int32')
 
     def test_index_select_api(self):
@@ -102,10 +102,8 @@ class TestIndexSelectAPI(unittest.TestCase):
 
         # case 1:
         with program_guard(Program(), Program()):
-            x = fluid.layers.data(name='x', shape=[-1, 4])
-            index = fluid.layers.data(
-                name='index', shape=[3], dtype='int32', append_batch_size=False
-            )
+            x = paddle.static.data(name='x', shape=[-1, 4], dtype='float32')
+            index = paddle.static.data(name='index', shape=[3], dtype='int32')
             z = paddle.index_select(x, index, axis=1)
             exe = fluid.Executor(fluid.XPUPlace(0))
             (res,) = exe.run(
@@ -120,10 +118,8 @@ class TestIndexSelectAPI(unittest.TestCase):
 
         # case 2:
         with program_guard(Program(), Program()):
-            x = fluid.layers.data(name='x', shape=[-1, 4])
-            index = fluid.layers.data(
-                name='index', shape=[3], dtype='int32', append_batch_size=False
-            )
+            x = paddle.static.data(name='x', shape=[-1, 4], dtype='float32')
+            index = paddle.static.data(name='index', shape=[3], dtype='int32')
             z = paddle.index_select(x, index)
             exe = fluid.Executor(fluid.XPUPlace(0))
             (res,) = exe.run(
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_lookup_table_v2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_lookup_table_v2_op_xpu.py
index f0db43acb685b50fd05fd914738370426ceb47d4..94645bcf9b2a24ebf7fc14a2850a6c8880ac1e70 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_lookup_table_v2_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_lookup_table_v2_op_xpu.py
@@ -167,7 +167,7 @@ class TestLookupTableWithTensorIdsWIsSelectedRows(
 
 class TestLookupTableApi(unittest.TestCase):
     def test_api(self):
-        x = fluid.layers.data(name='x', shape=[20], dtype='int64')
+        x = paddle.static.data(name='x', shape=[-1, 20], dtype='int64')
         emb = fluid.embedding(input=x, size=[128, 64])
 
         place = paddle.XPUPlace(0)
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_mean_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_mean_op_xpu.py
index 3991862346bbae813f3d98fa8c5994b544031d3d..a13bea88b6a8068f83d1d58cdf52c56f743c15c9 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_mean_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_mean_op_xpu.py
@@ -17,11 +17,11 @@ import unittest
 
 import numpy as np
 
+import paddle
+
 sys.path.append("..")
 from op_test_xpu import XPUOpTest
 
-import paddle
-import paddle.fluid as fluid
 from paddle.fluid import Program, program_guard
 
 np.random.seed(10)
@@ -91,12 +91,12 @@ class TestMeanOpError(unittest.TestCase):
             input1 = 12
             self.assertRaises(TypeError, paddle.mean, input1)
             # The input dtype of mean_op must be float16, float32, float64.
-            input2 = fluid.layers.data(
-                name='input2', shape=[12, 10], dtype="int32"
+            input2 = paddle.static.data(
+                name='input2', shape=[-1, 12, 10], dtype="int32"
             )
             self.assertRaises(TypeError, paddle.mean, input2)
-            input3 = fluid.layers.data(
-                name='input3', shape=[4], dtype="float16"
+            input3 = paddle.static.data(
+                name='input3', shape=[-1, 4], dtype="float16"
             )
             paddle.nn.functional.softmax(input3)
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_one_hot_v2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_one_hot_v2_op_xpu.py
index dce5e263b9545fcd712522514ec6a98da11c274b..0a5752989e821bb6b664b84d19f5daa0f6228e02 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_one_hot_v2_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_one_hot_v2_op_xpu.py
@@ -167,7 +167,7 @@ class TestOneHotOpApi(unittest.TestCase):
             )
 
     def _run(self, depth):
-        label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+        label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64")
         one_hot_label = fluid.one_hot(input=label, depth=depth)
 
         place = fluid.XPUPlace(0)
@@ -191,10 +191,9 @@ class BadInputTestOnehotV2(unittest.TestCase):
         with fluid.program_guard(fluid.Program()):
 
             def test_bad_x():
-                label = fluid.layers.data(
+                label = paddle.static.data(
                     name="label",
                     shape=[4],
-                    append_batch_size=False,
                     dtype="float32",
                 )
                 one_hot_label = fluid.one_hot(input=label, depth=4)
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_pool_max_op.py b/python/paddle/fluid/tests/unittests/xpu/test_pool_max_op.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d27dcc760502656f85ce309ace73d9900e45c91
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/xpu/test_pool_max_op.py
@@ -0,0 +1,171 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import unittest
+
+import numpy as np
+
+sys.path.append("..")
+from op_test_xpu import XPUOpTest
+from xpu.get_test_cover_info import (
+    XPUOpTestWrapper,
+    create_test_class,
+    get_xpu_op_support_types,
+)
+
+import paddle
+
+paddle.enable_static()
+
+
+def max_pool2D_forward_naive(
+    x, ksize, strides, paddings, global_pool=False, adaptive=False
+):
+
+    N, C, H, W = x.shape
+    global_pool = global_pool or (adaptive or (ksize[0] * ksize[1] == 1))
+    if global_pool:
+        ksize = [H, W]
+        paddings = [0, 0]
+
+    H_out = (H - ksize[0] + 2 * paddings[0]) // strides[0] + 1
+    W_out = (W - ksize[1] + 2 * paddings[1]) // strides[1] + 1
+    out = np.zeros((N, C, H_out, W_out))
+    mask = np.zeros((N, C, H_out, W_out))
+    for i in range(H_out):
+        for j in range(W_out):
+            r0 = i * strides[0] - paddings[0]
+            r1 = r0 + ksize[0]
+            c0 = j * strides[1] - paddings[1]
+            c1 = c0 + ksize[1]
+            r_start = np.max((r0, 0))
+            r_end = np.min((r1, H))
+            c_start = np.max((c0, 0))
+            c_end = np.min((c1, W))
+            x_masked = x[:, :, r_start:r_end, c_start:c_end]
+
+            out[:, :, i, j] = np.max(x_masked, axis=(2, 3))
+
+            for n in range(N):
+                for c in range(C):
+                    arr = x_masked[n, c, :, :]
+                    index = np.where(arr == np.max(arr))
+                    sub_row = index[0][-1] - r0 if r0 < 0 else index[0][-1]
+                    sub_col = index[1][-1] - c0 if c0 < 0 else index[1][-1]
+                    index = sub_row * (r1 - r0) + sub_col
+                    mask[n, c, i, j] = index
+
+    return out, mask
+
+
+class XPUTestPoolWithIndex_op(XPUOpTestWrapper):
+    def __init__(self):
+        self.op_name = 'max_pool2d_with_index'
+        self.use_dynamic_create_class = False
+
+    class TestMaxPoolWithIndex_Op(XPUOpTest):
+        def setUp(self):
+            self.op_type = 'max_pool2d_with_index'
+            self.dtype = self.in_type
+            self.place = paddle.XPUPlace(0)
+            self.init_test_case()
+            self.init_global()
+            self.init_adaptive()
+
+            input = np.random.random(self.shape).astype(self.dtype)
+            input = np.round(input * 100.0, 2)
+            output, mask = self.pool_forward_naive(
+                input,
+                self.ksize,
+                self.strides,
+                self.paddings,
+                self.global_pool,
+                self.adaptive,
+            )
+            output = output.astype(self.dtype)
+            mask = mask.astype("int32")
+
+            self.attrs = {
+                'strides': self.strides,
+                'paddings': self.paddings,
+                'ksize': self.ksize,
+                'global_pooling': self.global_pool,
+                'adaptive': self.adaptive,
+            }
+
+            self.inputs = {'X': input}
+            self.outputs = {'Out': output, "Mask": mask}
+
+        def test_check_output(self):
+            self.check_output_with_place(self.place)
+
+        def test_check_grad(self):
+            self.check_grad_with_place(self.place, set(['X']), ['Out'])
+
+        def init_test_case(self):
+            self.pool_forward_naive = max_pool2D_forward_naive
+            self.shape = [2, 3, 7, 7]
+            self.ksize = [3, 3]
+            self.strides = [2, 2]
+            self.paddings = [1, 1]
+
+        def init_global(self):
+            self.global_pool = False
+
+        def init_adaptive(self):
+            self.adaptive = False
+
+    # TODO pool3d is not supported for now
+    # ----------------max_pool2d_with_index----------------
+    class TestCase4(TestMaxPoolWithIndex_Op):
+        def init_test_case(self):
+            self.op_type = "max_pool2d_with_index"
+            self.pool_forward_naive = max_pool2D_forward_naive
+            self.shape = [2, 3, 7, 7]
+            self.ksize = [3, 3]
+            self.strides = [1, 1]
+            self.paddings = [1, 1]
+
+        def init_global(self):
+            self.global_pool = True
+
+    class TestCase5(TestCase4):
+        def init_global(self):
+            self.global_pool = False
+
+    class TestCase6(TestMaxPoolWithIndex_Op):
+        def init_test_case(self):
+            self.op_type = "max_pool2d_with_index"
+            self.pool_forward_naive = max_pool2D_forward_naive
+            self.shape = [2, 3, 7, 7]
+            self.ksize = [3, 3]
+            self.strides = [2, 2]
+            self.paddings = [0, 0]
+
+        def init_global(self):
+            self.global_pool = True
+
+    class TestCase7(TestCase6):
+        def init_global(self):
+            self.global_pool = False
+
+
+support_types = get_xpu_op_support_types('max_pool2d_with_index')
+for stype in support_types:
+    create_test_class(globals(), XPUTestPoolWithIndex_op, stype)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_reduce_sum_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_reduce_sum_op_xpu.py
index d8a1e9efcbcddd70f574058e1142687b405d41ce..2ffc6c2d2279751019be7c5ca37e6eeac901c510 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_reduce_sum_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_reduce_sum_op_xpu.py
@@ -1,4 +1,4 @@
-#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#   Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -48,7 +48,7 @@ class XPUTestReduceSumOp(XPUOpTestWrapper):
                 'reduce_all': self.reduce_all,
                 'keep_dim': self.keep_dim,
             }
-            self.inputs = {'X': np.random.random(self.shape).astype("float32")}
+            self.inputs = {'X': np.random.random(self.shape).astype(self.dtype)}
             if self.attrs['reduce_all']:
                 self.outputs = {'Out': self.inputs['X'].sum()}
             else:
@@ -63,6 +63,7 @@ class XPUTestReduceSumOp(XPUOpTestWrapper):
             self.axis = (0,)
             self.reduce_all = False
             self.keep_dim = False
+            self.dtype = self.in_type
 
         def test_check_output(self):
             self.check_output_with_place(self.place)
@@ -71,12 +72,47 @@ class XPUTestReduceSumOp(XPUOpTestWrapper):
             self.check_grad_with_place(self.place, ['X'], 'Out')
 
     class XPUTestReduceSumCase1(XPUTestReduceSumBase):
+        def init_case(self):
+            self.shape = (5, 6, 10)
+            self.axis = (0,)
+            self.reduce_all = False
+            self.keep_dim = False
+
+    class XPUTestReduceSumCase2(XPUTestReduceSumBase):
         def init_case(self):
             self.shape = (5, 6, 10)
             self.axis = (0,)
             self.reduce_all = False
             self.keep_dim = True
 
+    class XPUTestReduceSumCase3(XPUTestReduceSumBase):
+        def init_case(self):
+            self.shape = (5, 6, 10)
+            self.axis = (0,)
+            self.reduce_all = True
+            self.keep_dim = False
+
+    class XPUTestReduceSumCase4(XPUTestReduceSumBase):
+        def init_case(self):
+            self.shape = (5, 6, 10)
+            self.axis = (1,)
+            self.reduce_all = False
+            self.keep_dim = False
+
+    class XPUTestReduceSumCase5(XPUTestReduceSumBase):
+        def init_case(self):
+            self.shape = (5, 6, 10)
+            self.axis = (1,)
+            self.reduce_all = False
+            self.keep_dim = True
+
+    class XPUTestReduceSumCase6(XPUTestReduceSumBase):
+        def init_case(self):
+            self.shape = (5, 6, 10)
+            self.axis = (1,)
+            self.reduce_all = True
+            self.keep_dim = False
+
 
 support_types = get_xpu_op_support_types('reduce_sum')
 for stype in support_types:
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_sequence_conv_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_sequence_conv_op_xpu.py
index c3e49d123241d76e7e7bb56be0876c0b0b74a69a..6c2fd3fc9fa6c1a5cfcbf97d963d232f45d8c580 100755
--- a/python/paddle/fluid/tests/unittests/xpu/test_sequence_conv_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_sequence_conv_op_xpu.py
@@ -433,7 +433,7 @@ class TestSeqConvApi(unittest.TestCase):
     def test_api(self):
         import paddle.fluid as fluid
 
-        x = fluid.layers.data('x', shape=[32], lod_level=1)
+        x = paddle.static.data('x', shape=[-1, 32], lod_level=1)
         y = fluid.layers.sequence_conv(
             input=x, num_filters=2, filter_size=3, padding_start=None
         )
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_where_index_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_where_index_xpu.py
index e2aa83126351137010f99a175ffd7c761643c6f1..c70f6f61f1390d1617c9a68f5008f24d92da970e 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_where_index_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_where_index_xpu.py
@@ -101,7 +101,7 @@ for stype in support_types:
 class TestWhereOpError(unittest.TestCase):
     def test_api(self):
         with program_guard(Program(), Program()):
-            cond = fluid.layers.data(name='cond', shape=[4], dtype='bool')
+            cond = paddle.static.data(name='cond', shape=[-1, 4], dtype='bool')
             result = paddle.nonzero(cond)
 
             exe = fluid.Executor(paddle.XPUPlace(0))
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_where_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_where_op_xpu.py
index 4a080ba70cd967068ec3bb49fc53f8ad21667c9e..45aa192d72796b2bcce296667c4d01056115b2ce 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_where_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_where_op_xpu.py
@@ -150,8 +150,8 @@ class TestXPUWhereAPI(unittest.TestCase):
         train_prog = fluid.Program()
         startup = fluid.Program()
         with fluid.program_guard(train_prog, startup):
-            x = fluid.layers.data(name='x', shape=[4, 1], dtype='float32')
-            y = fluid.layers.data(name='y', shape=[4, 2], dtype='float32')
+            x = paddle.static.data(name='x', shape=[-1, 4, 1], dtype='float32')
+            y = paddle.static.data(name='y', shape=[-1, 4, 2], dtype='float32')
             x_i = np.array([[0.9383, 0.1983, 3.2, 1.2]]).astype("float32")
             y_i = np.array([[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0]]).astype(
                 "float32"
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_while_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_while_op_xpu.py
index 10e3a6fddcdae704c26d3a6ee5e041555402ae11..56b93e494523ffb3a52a19096ac583d9aa7c7439 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_while_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_while_op_xpu.py
@@ -27,15 +27,9 @@ paddle.enable_static()
 
 class TestWhileOp(unittest.TestCase):
     def simple_net(self):
-        d0 = layers.data(
-            "d0", shape=[10], append_batch_size=False, dtype='float32'
-        )
-        d1 = layers.data(
-            "d1", shape=[10], append_batch_size=False, dtype='float32'
-        )
-        d2 = layers.data(
-            "d2", shape=[10], append_batch_size=False, dtype='float32'
-        )
+        d0 = paddle.static.data("d0", shape=[10], dtype='float32')
+        d1 = paddle.static.data("d1", shape=[10], dtype='float32')
+        d2 = paddle.static.data("d2", shape=[10], dtype='float32')
         i = layers.zeros(shape=[1], dtype='int64')
         i.stop_gradient = True
         init = layers.zeros(shape=[10], dtype='float32')
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_zero_dim_tensor_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_zero_dim_tensor_xpu.py
old mode 100755
new mode 100644
index a9d95fc963ce338dd06787d13ea26514dc9b4855..43b44a07dcbb873cbab2ae6f3b959dd8c33c14ae
--- a/python/paddle/fluid/tests/unittests/xpu/test_zero_dim_tensor_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_zero_dim_tensor_xpu.py
@@ -17,12 +17,10 @@ import unittest
 import numpy as np
 
 import paddle
-import paddle.fluid as fluid
 import paddle.nn.functional as F
 
 paddle.set_device('xpu')
-fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
-
+paddle.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
 
 unary_api_list = [
     paddle.nn.functional.elu,
@@ -149,13 +147,17 @@ class TestReduceAPI(unittest.TestCase):
                 x = paddle.rand([])
             x.stop_gradient = False
             out = api(x, None)
+
             out.backward()
 
             self.assertEqual(x.shape, [])
             self.assertEqual(out.shape, [])
+            np.testing.assert_allclose(out.numpy(), x.numpy())
             if x.grad is not None:
                 self.assertEqual(x.grad.shape, [])
                 self.assertEqual(out.grad.shape, [])
+                np.testing.assert_allclose(x.grad.numpy(), np.array(1.0))
+                np.testing.assert_allclose(out.grad.numpy(), np.array(1.0))
 
         paddle.enable_static()
 
@@ -440,6 +442,36 @@ class TestSundryAPI(unittest.TestCase):
         self.assertEqual(out.shape, [])
         self.assertEqual(out.numpy(), 0)
 
+    def test_transpose(self):
+        x = paddle.rand([])
+        x.stop_gradient = False
+        out = paddle.transpose(x, [])
+        out.backward()
+
+        self.assertEqual(out.shape, [])
+        self.assertEqual(out, x)
+        self.assertEqual(out.grad.shape, [])
+        self.assertEqual(x.grad.shape, [])
+        self.assertEqual(x.grad, 1.0)
+
+        with self.assertRaises(ValueError):
+            x = paddle.transpose(x, [0])
+
+    def test_moveaxis(self):
+        x = paddle.rand([])
+        x.stop_gradient = False
+        out = paddle.moveaxis(x, [], [])
+        out.backward()
+
+        self.assertEqual(out.shape, [])
+        self.assertEqual(out, x)
+        self.assertEqual(out.grad.shape, [])
+        self.assertEqual(x.grad.shape, [])
+        self.assertEqual(x.grad, 1.0)
+
+        with self.assertRaises(AssertionError):
+            x = paddle.moveaxis(x, [1], [0])
+
     def test_gather_1D(self):
         x = paddle.to_tensor([1.0, 3.0, 5.0, 7.0, 9.0], stop_gradient=False)
         index = paddle.full([], 2, 'int64')
@@ -824,6 +856,19 @@ class TestSundryAPI(unittest.TestCase):
         self.assertEqual(x.grad.shape, [])
         self.assertEqual(x.grad.numpy(), 1)
 
+    def test_unsqueeze(self):
+        x1 = paddle.full([], 2)
+        x1.stop_gradient = False
+        out1 = paddle.unsqueeze(x1, axis=0)
+        out1.backward()
+        self.assertEqual(out1.shape, [1])
+        self.assertEqual(x1.grad.shape, [])
+
+        x2 = paddle.full([], 0, dtype='int32')
+        out2 = paddle.unsqueeze(x1, axis=x2)
+        out2.backward()
+        self.assertEqual(out2.shape, [1])
+
 
 # Use to test API whose zero-dim input tensors don't have grad and not need to test backward in OpTest.
 
diff --git a/python/paddle/incubate/autograd/primapi.py b/python/paddle/incubate/autograd/primapi.py
index 76e0802194272927c6318bba7def02e67314cdfd..476f7125c443ecc9ae0d045be8cade974f634f09 100644
--- a/python/paddle/incubate/autograd/primapi.py
+++ b/python/paddle/incubate/autograd/primapi.py
@@ -218,7 +218,7 @@ def grad(outputs, inputs, grad_outputs=None):
 @framework.static_only
 def to_prim(blocks):
     """Search nonbasic ops which have be registered composite rules and replace them with primitive ops."""
-    if not core.enable_prim_forward():
+    if not core._is_fwd_prim_enabled():
         return
     if isinstance(blocks, paddle.fluid.framework.Block):
         logging.info("Atomize composite op to primitive ops begin.")
@@ -235,5 +235,6 @@ def to_prim(blocks):
             f"Expect block or sequence of blocks, but got {type(blocks)}."
         )
     with framework.program_guard(main_program):
+        print("Running lowering for forward...")
         primx._lower_composite(blocks)
     return
diff --git a/python/paddle/jit/dy2static/partial_program.py b/python/paddle/jit/dy2static/partial_program.py
index 293c8b40f7752bffa3819e459a929052780cff0e..19d44b8e35c8ba5d9d6f414eacd4c4a266eac856 100644
--- a/python/paddle/jit/dy2static/partial_program.py
+++ b/python/paddle/jit/dy2static/partial_program.py
@@ -571,13 +571,10 @@ class PartialProgramLayer:
                 targets.append(program.global_block().var(out.name))
 
         if targets:
-            enable_prim = self._build_strategy.build_cinn_pass
-            if enable_prim and core.enable_prim_backward():
-                core.set_prim_enabled(True)
-                backward.gradients(targets=targets, inputs=[])
-                core.set_prim_enabled(False)
-            else:
-                backward.gradients(targets=targets, inputs=[])
+            if self._build_strategy.build_cinn_pass:
+                # TODO(Jiabin): Change this to True if we need this to be default option
+                core.check_and_set_prim_all_enabled()
+            backward.gradients(targets=targets, inputs=[])
 
         start_idx = len(main_program.block(0).ops) + 2 * len(
             self._outputs.tolist()
diff --git a/python/paddle/jit/dy2static/program_translator.py b/python/paddle/jit/dy2static/program_translator.py
index 5b8493977e904b9fefdb4ae448b8df1abd499e13..5a66cd103a7fe70efb455b2840b09e86fed67f53 100644
--- a/python/paddle/jit/dy2static/program_translator.py
+++ b/python/paddle/jit/dy2static/program_translator.py
@@ -1092,8 +1092,9 @@ class ProgramCache:
     def _build_once(self, cache_key):
         # TODO(Aurelius84): Need a gloabl FLAGS to enable/disable to_prim
         enable_prim = cache_key.kwargs['build_strategy'].build_cinn_pass
-        if enable_prim and core.enable_prim_backward():
-            core.set_prim_enabled(True)
+        if enable_prim:
+            # TODO(Jiabin): Change this to True if we need this to be default option
+            core.check_and_set_prim_all_enabled()
 
         concrete_program = ConcreteProgram.from_func_spec(
             func_spec=cache_key.function_spec,
@@ -1103,9 +1104,7 @@ class ProgramCache:
             **cache_key.kwargs
         )
 
-        if enable_prim or core.enable_prim_forward() == "debug":
-            concrete_program._to_prim()
-            core.set_prim_enabled(False)
+        concrete_program._to_prim()
         return concrete_program, partial_program_from(concrete_program)
 
     def __getitem__(self, item):
diff --git a/python/paddle/optimizer/adam.py b/python/paddle/optimizer/adam.py
index 070efdff2d126f40d79f31a5a39e9942b39b604e..9c827496e8b2e350c3109c27baecfd21408b5cb0 100644
--- a/python/paddle/optimizer/adam.py
+++ b/python/paddle/optimizer/adam.py
@@ -360,8 +360,6 @@ class Adam(Optimizer):
         # create the adam optimize op
 
         if framework.in_dygraph_mode():
-            found_inf = self._get_auxiliary_var('found_inf')
-
             _beta1 = (
                 self._beta1
                 if not isinstance(self._beta1, Variable)
@@ -382,7 +380,7 @@ class Adam(Optimizer):
                 beta1_pow_acc,
                 beta2_pow_acc,
                 master_weight,
-                found_inf,
+                None,
                 _beta1,
                 _beta2,
                 self._epsilon,
@@ -693,21 +691,28 @@ class Adam(Optimizer):
                         if master_weight is not None
                         else None
                     )
-                    _, _, _, _, _, _ = _C_ops.merged_adam_(
-                        self._param_dict[key][param_group_idx],
-                        grad_dict[key],
-                        lr_dict[key],
-                        self._moment1_dict[key][param_group_idx],
-                        self._moment2_dict[key][param_group_idx],
-                        self._beta1_pow_acc_dict[key][param_group_idx],
-                        self._beta2_pow_acc_dict[key][param_group_idx],
-                        master_weight,
-                        _beta1,
-                        _beta2,
-                        self._epsilon,
-                        find_master,
-                        False,
-                    )
+                    found_inf = self._get_auxiliary_var('found_inf')
+                    if found_inf:
+                        if isinstance(found_inf, core.eager.Tensor):
+                            self._set_auxiliary_var('found_inf', True)
+                    else:
+                        if isinstance(found_inf, core.eager.Tensor):
+                            self._set_auxiliary_var('found_inf', False)
+                        _, _, _, _, _, _ = _C_ops.merged_adam_(
+                            self._param_dict[key][param_group_idx],
+                            grad_dict[key],
+                            lr_dict[key],
+                            self._moment1_dict[key][param_group_idx],
+                            self._moment2_dict[key][param_group_idx],
+                            self._beta1_pow_acc_dict[key][param_group_idx],
+                            self._beta2_pow_acc_dict[key][param_group_idx],
+                            master_weight,
+                            _beta1,
+                            _beta2,
+                            self._epsilon,
+                            find_master,
+                            False,
+                        )
                 else:
                     inputs = {
                         "Param": self._param_dict[key][param_group_idx],
diff --git a/python/paddle/optimizer/adamw.py b/python/paddle/optimizer/adamw.py
index a4d304b451e7b3cad3fdab97bf05e7854146a260..5a75e6d243696a4407910ef650a854f98e51aba8 100644
--- a/python/paddle/optimizer/adamw.py
+++ b/python/paddle/optimizer/adamw.py
@@ -491,7 +491,6 @@ class AdamW(Optimizer):
                 else self._beta2.numpy().item(0)
             )
 
-            found_inf = self._get_auxiliary_var('found_inf')
             _, _, _, _, _, _ = _C_ops.adamw_(
                 param_and_grad[0],
                 param_and_grad[1],
@@ -501,7 +500,7 @@ class AdamW(Optimizer):
                 beta1_pow_acc,
                 beta2_pow_acc,
                 master_weight,
-                found_inf,
+                None,
                 _beta1,
                 _beta2,
                 self._epsilon,
diff --git a/python/paddle/optimizer/lamb.py b/python/paddle/optimizer/lamb.py
index e531e785e319fbe4e328fbef84c5426d05974a09..57904cd44a86cccfade4a6950a86b6eccd8c4af5 100644
--- a/python/paddle/optimizer/lamb.py
+++ b/python/paddle/optimizer/lamb.py
@@ -293,7 +293,6 @@ class Lamb(Optimizer):
             self._used_master_weights[p_name] = master_weight.name
         else:
             master_weight = None
-        found_inf = self._get_auxiliary_var('found_inf')
 
         if framework.in_dygraph_mode():
             _C_ops.lamb_(
@@ -305,7 +304,7 @@ class Lamb(Optimizer):
                 beta1_pow_acc,
                 beta2_pow_acc,
                 master_weight,
-                found_inf,
+                None,
                 weight_decay,
                 self._beta1,
                 self._beta2,
@@ -343,6 +342,7 @@ class Lamb(Optimizer):
                 inputs["MasterParam"] = master_weight
                 outputs["MasterParamOut"] = master_weight
 
+            found_inf = self._get_auxiliary_var('found_inf')
             if found_inf:
                 inputs["SkipUpdate"] = found_inf
 
diff --git a/python/paddle/optimizer/momentum.py b/python/paddle/optimizer/momentum.py
index 3b20777599fb01e9cb1e5576a4a26d07a2540770..bff9c1209e70806f19b24f7cb1a6fc922c4e541c 100644
--- a/python/paddle/optimizer/momentum.py
+++ b/python/paddle/optimizer/momentum.py
@@ -530,19 +530,30 @@ class Momentum(Optimizer):
                 )
 
                 if in_dygraph_mode():
-                    _, _, _ = _C_ops.merged_momentum_(
-                        self._param_dict[key][param_group_idx],
-                        grad_dict[key],
-                        self._velocity_dict[key][param_group_idx],
-                        lr_dict[key],
-                        master_weight,
-                        self._momentum,
-                        self._use_nesterov,
-                        self._regularization_method_dict[key][param_group_idx],
-                        self._regularization_coeff_dict[key][param_group_idx],
-                        find_master,
-                        self._rescale_grad,
-                    )
+                    found_inf = self._get_auxiliary_var('found_inf')
+                    if found_inf:
+                        if isinstance(found_inf, core.eager.Tensor):
+                            self._set_auxiliary_var('found_inf', True)
+                    else:
+                        if isinstance(found_inf, core.eager.Tensor):
+                            self._set_auxiliary_var('found_inf', False)
+                        _, _, _ = _C_ops.merged_momentum_(
+                            self._param_dict[key][param_group_idx],
+                            grad_dict[key],
+                            self._velocity_dict[key][param_group_idx],
+                            lr_dict[key],
+                            master_weight,
+                            self._momentum,
+                            self._use_nesterov,
+                            self._regularization_method_dict[key][
+                                param_group_idx
+                            ],
+                            self._regularization_coeff_dict[key][
+                                param_group_idx
+                            ],
+                            find_master,
+                            self._rescale_grad,
+                        )
                 else:
                     inputs = {
                         "Param": self._param_dict[key][param_group_idx],
diff --git a/python/paddle/optimizer/optimizer.py b/python/paddle/optimizer/optimizer.py
index 1799461254ced546eb35ac119d0cf893169c854e..cad226952be41807801d2e651f725e23879926d8 100644
--- a/python/paddle/optimizer/optimizer.py
+++ b/python/paddle/optimizer/optimizer.py
@@ -920,31 +920,38 @@ class Optimizer:
                 self._create_accumulators(target_block, params_acc_dict)
 
             if framework._non_static_mode():
-                if isinstance(parameters_and_grads, list):
-                    for param_and_grad in parameters_and_grads:
-                        if param_and_grad[1] is None:
-                            continue
-                        if param_and_grad[0].stop_gradient is False:
-                            self._append_optimize_op(
-                                target_block, param_and_grad
-                            )
+                found_inf = self._get_auxiliary_var('found_inf')
+                if found_inf:
+                    if isinstance(found_inf, core.eager.Tensor):
+                        self._set_auxiliary_var('found_inf', True)
                 else:
-                    for param_and_grad in parameters_and_grads['params']:
-                        if param_and_grad[1] is None:
-                            continue
-                        if param_and_grad[0].stop_gradient is False:
-                            param_grad_dict = dict()
-                            param_grad_dict['params'] = param_and_grad
-                            param_grad_dict.update(
-                                {
-                                    k: v
-                                    for k, v in parameters_and_grads.items()
-                                    if k != 'params'
-                                }
-                            )
-                            self._append_optimize_op(
-                                target_block, param_grad_dict
-                            )
+                    if isinstance(found_inf, core.eager.Tensor):
+                        self._set_auxiliary_var('found_inf', False)
+                    if isinstance(parameters_and_grads, list):
+                        for param_and_grad in parameters_and_grads:
+                            if param_and_grad[1] is None:
+                                continue
+                            if param_and_grad[0].stop_gradient is False:
+                                self._append_optimize_op(
+                                    target_block, param_and_grad
+                                )
+                    else:
+                        for param_and_grad in parameters_and_grads['params']:
+                            if param_and_grad[1] is None:
+                                continue
+                            if param_and_grad[0].stop_gradient is False:
+                                param_grad_dict = dict()
+                                param_grad_dict['params'] = param_and_grad
+                                param_grad_dict.update(
+                                    {
+                                        k: v
+                                        for k, v in parameters_and_grads.items()
+                                        if k != 'params'
+                                    }
+                                )
+                                self._append_optimize_op(
+                                    target_block, param_grad_dict
+                                )
             else:
                 for param_and_grad in parameters_and_grads:
                     if param_and_grad[1] is None:
diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py
index 0c74a900e1b72af82096e800666c892a15a7f513..bdd903ee8f1966179301bccab04e3249b09ae299 100644
--- a/python/paddle/tensor/manipulation.py
+++ b/python/paddle/tensor/manipulation.py
@@ -14,8 +14,6 @@
 
 # TODO: define functions to manipulate a tensor
 
-from collections import Counter
-
 import numpy as np
 
 import paddle
@@ -4335,11 +4333,9 @@ def moveaxis(x, source, destination, name=None):
         dst
     ), "'source' must have the same number with 'destination'"
 
-    count = Counter(src).most_common(1)
-    if count[0][1] > 1:
+    if len(src) != len(set(src)):
         raise ValueError("Each elemment of 'source' must be unique!")
-    count = Counter(dst).most_common(1)
-    if count[0][1] > 1:
+    if len(dst) != len(set(dst)):
         raise ValueError("Each elemment of 'destination' must be unique!")
 
     ndim = len(x.shape)
diff --git a/python/paddle/tensor/stat.py b/python/paddle/tensor/stat.py
index e152c2a366072de90d1d6ce09e32c60e71ebdda0..e23f28aa76b1b365fe496dfcc54338f40b05500c 100644
--- a/python/paddle/tensor/stat.py
+++ b/python/paddle/tensor/stat.py
@@ -158,6 +158,7 @@ def var(x, axis=None, unbiased=True, keepdim=False, name=None):
     if unbiased:
         one_const = paddle.ones([], x.dtype)
         n = where(n > one_const, n - 1.0, one_const)
+    n.stop_gradient = True
     out /= n
     return out
 
@@ -500,8 +501,6 @@ def _compute_quantile(x, q, axis=None, keepdim=False, ignore_nan=False):
         out_shape = [1] * dims
     else:
         if isinstance(axis, list):
-            if len(axis) <= 0:
-                raise ValueError("axis should not be empty")
             axis_src, axis_dst = [], []
             for axis_single in axis:
                 if not isinstance(axis_single, int) or not (
@@ -514,10 +513,15 @@ def _compute_quantile(x, q, axis=None, keepdim=False, ignore_nan=False):
                     axis_single = axis_single + dims
                 axis_src.append(axis_single)
                 out_shape[axis_single] = 1
+
             axis_dst = list(range(-len(axis), 0))
             x = paddle.moveaxis(x, axis_src, axis_dst)
-            x = paddle.flatten(x, axis_dst[0], axis_dst[-1])
-            axis = axis_dst[0]
+            if len(axis_dst) == 0:
+                x = paddle.flatten(x)
+                axis = 0
+            else:
+                x = paddle.flatten(x, axis_dst[0], axis_dst[-1])
+                axis = axis_dst[0]
         else:
             if not isinstance(axis, int) or not (axis < dims and axis >= -dims):
                 raise ValueError(
diff --git a/python/setup.py.in b/python/setup.py.in
index 71a1cfb748198aaf6a5b3bfa05ba2c0af19e5bf5..aee0e69a06d6c43b380007f2ce649ffb1ee3e5ba 100644
--- a/python/setup.py.in
+++ b/python/setup.py.in
@@ -75,6 +75,18 @@ def get_cudnn_version():
     else:
         return 'False'
 
+def get_xpu_version():
+    if '@WITH_XPU@' == 'ON':
+        return '@XPU_BASE_DATE@'
+    else:
+        return 'False'
+
+def get_xpu_xccl_version():
+    if '@WITH_XPU_BKCL@' == 'ON':
+        return '@XPU_XCCL_BASE_VERSION@'
+    else:
+        return 'False'
+
 def is_taged():
     try:
         cmd = ['git', 'describe', '--exact-match', '--tags', 'HEAD', '2>/dev/null']
@@ -91,18 +103,20 @@ def is_taged():
 def write_version_py(filename='paddle/version/__init__.py'):
     cnt = '''# THIS FILE IS GENERATED FROM PADDLEPADDLE SETUP.PY
 #
-full_version    = '%(major)d.%(minor)d.%(patch)s'
-major           = '%(major)d'
-minor           = '%(minor)d'
-patch           = '%(patch)s'
-rc              = '%(rc)d'
-cuda_version    = '%(cuda)s'
-cudnn_version   = '%(cudnn)s'
-istaged         = %(istaged)s
-commit          = '%(commit)s'
-with_mkl        = '%(with_mkl)s'
-
-__all__ = ['cuda', 'cudnn', 'show']
+full_version     = '%(major)d.%(minor)d.%(patch)s'
+major            = '%(major)d'
+minor            = '%(minor)d'
+patch            = '%(patch)s'
+rc               = '%(rc)d'
+cuda_version     = '%(cuda)s'
+cudnn_version    = '%(cudnn)s'
+xpu_version      = '%(xpu)s'
+xpu_xccl_version = '%(xpu_xccl)s'
+istaged          = %(istaged)s
+commit           = '%(commit)s'
+with_mkl         = '%(with_mkl)s'
+
+__all__ = ['cuda', 'cudnn', 'show', 'xpu', 'xpu_xccl']
 
 def show():
     """Get the version of paddle if `paddle` package if tagged. Otherwise, output the corresponding commit id.
@@ -125,6 +139,10 @@ def show():
 
         cudnn: the cudnn version of package. It will return `False` if CPU version paddle package is installed
 
+        xpu: the xpu version of package. It will return `False` if non-XPU version paddle package is installed
+
+        xpu_xccl: the xpu xccl version of package. It will return `False` if non-XPU version paddle package is installed
+
     Examples:
         .. code-block:: python
 
@@ -139,12 +157,16 @@ def show():
             # rc: 0
             # cuda: '10.2'
             # cudnn: '7.6.5'
+            # xpu: '20230114'
+            # xpu_xccl: '1.0.7'
 
             # Case 2: paddle is not tagged
             paddle.version.show()
             # commit: cfa357e984bfd2ffa16820e354020529df434f7d
             # cuda: '10.2'
             # cudnn: '7.6.5'
+            # xpu: '20230114'
+            # xpu_xccl: '1.0.7'
     """
     if istaged:
         print('full_version:', full_version)
@@ -156,6 +178,8 @@ def show():
         print('commit:', commit)
     print('cuda:', cuda_version)
     print('cudnn:', cudnn_version)
+    print('xpu:', xpu_version)
+    print('xpu_xccl:', xpu_xccl_version)
 
 def mkl():
     return with_mkl
@@ -165,7 +189,7 @@ def cuda():
 
     Returns:
         string: Return the version information of cuda. If paddle package is CPU version, it will return False.
-    
+
     Examples:
         .. code-block:: python
 
@@ -182,7 +206,7 @@ def cudnn():
 
     Returns:
         string: Return the version information of cudnn. If paddle package is CPU version, it will return False.
-    
+
     Examples:
         .. code-block:: python
 
@@ -193,6 +217,40 @@ def cudnn():
 
     """
     return cudnn_version
+
+def xpu():
+    """Get xpu version of paddle package.
+
+    Returns:
+        string: Return the version information of xpu. If paddle package is non-XPU version, it will return False.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+
+            paddle.version.xpu()
+            # '20230114'
+
+    """
+    return xpu_version
+
+def xpu_xccl():
+    """Get xpu xccl version of paddle package.
+
+    Returns:
+        string: Return the version information of xpu xccl. If paddle package is non-XPU version, it will return False.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+
+            paddle.version.xpu_xccl()
+            # '1.0.7'
+
+    """
+    return xpu_xccl_version
 '''
     commit = git_commit()
 
@@ -213,6 +271,8 @@ def cudnn():
             'version': '${PADDLE_VERSION}',
             'cuda': get_cuda_version(),
             'cudnn': get_cudnn_version(),
+            'xpu': get_xpu_version(),
+            'xpu_xccl': get_xpu_xccl_version(),
             'commit': commit,
             'istaged': is_taged(),
             'with_mkl': '@WITH_MKL@'})
@@ -463,7 +523,7 @@ shutil.copy('${WARPCTC_LIBRARIES}', libs_path)
 shutil.copy('${WARPRNNT_LIBRARIES}', libs_path)
 
 package_data['paddle.libs']+=[
-    os.path.basename('${LAPACK_LIB}'), 
+    os.path.basename('${LAPACK_LIB}'),
     os.path.basename('${BLAS_LIB}'),
     os.path.basename('${GFORTRAN_LIB}'),
     os.path.basename('${GNU_RT_LIB_1}')]
diff --git a/setup.py b/setup.py
index 90be1f9bb254fadf820426a2337ddcdd197308b1..8737de4e36a6cf0f0246e1641408c2d190cbda99 100644
--- a/setup.py
+++ b/setup.py
@@ -345,6 +345,22 @@ def get_cudnn_version():
         return 'False'
 
 
+def get_xpu_version():
+    with_xpu = env_dict.get("WITH_XPU")
+    if with_xpu == 'ON':
+        return env_dict.get("XPU_BASE_DATE")
+    else:
+        return 'False'
+
+
+def get_xpu_xccl_version():
+    with_xpu_xccl = env_dict.get("WITH_XPU_BKCL")
+    if with_xpu_xccl == 'ON':
+        return env_dict.get("XPU_XCCL_BASE_VERSION")
+    else:
+        return 'False'
+
+
 def is_taged():
     try:
         cmd = [
@@ -376,18 +392,20 @@ def is_taged():
 def write_version_py(filename='paddle/version/__init__.py'):
     cnt = '''# THIS FILE IS GENERATED FROM PADDLEPADDLE SETUP.PY
 #
-full_version    = '%(major)d.%(minor)d.%(patch)s'
-major           = '%(major)d'
-minor           = '%(minor)d'
-patch           = '%(patch)s'
-rc              = '%(rc)d'
-cuda_version    = '%(cuda)s'
-cudnn_version   = '%(cudnn)s'
-istaged         = %(istaged)s
-commit          = '%(commit)s'
-with_mkl        = '%(with_mkl)s'
-
-__all__ = ['cuda', 'cudnn', 'show']
+full_version     = '%(major)d.%(minor)d.%(patch)s'
+major            = '%(major)d'
+minor            = '%(minor)d'
+patch            = '%(patch)s'
+rc               = '%(rc)d'
+cuda_version     = '%(cuda)s'
+cudnn_version    = '%(cudnn)s'
+xpu_version      = '%(xpu)s'
+xpu_xccl_version = '%(xpu_xccl)s'
+istaged          = %(istaged)s
+commit           = '%(commit)s'
+with_mkl         = '%(with_mkl)s'
+
+__all__ = ['cuda', 'cudnn', 'show', 'xpu', 'xpu_xccl']
 
 def show():
     """Get the version of paddle if `paddle` package if tagged. Otherwise, output the corresponding commit id.
@@ -410,6 +428,10 @@ def show():
 
         cudnn: the cudnn version of package. It will return `False` if CPU version paddle package is installed
 
+        xpu: the xpu version of package. It will return `False` if non-XPU version paddle package is installed
+
+        xpu_xccl: the xpu xccl version of package. It will return `False` if non-XPU version paddle package is installed
+
     Examples:
         .. code-block:: python
 
@@ -424,12 +446,16 @@ def show():
             # rc: 0
             # cuda: '10.2'
             # cudnn: '7.6.5'
+            # xpu: '20230114'
+            # xpu_xccl: '1.0.7'
 
             # Case 2: paddle is not tagged
             paddle.version.show()
             # commit: cfa357e984bfd2ffa16820e354020529df434f7d
             # cuda: '10.2'
             # cudnn: '7.6.5'
+            # xpu: '20230114'
+            # xpu_xccl: '1.0.7'
     """
     if istaged:
         print('full_version:', full_version)
@@ -441,6 +467,8 @@ def show():
         print('commit:', commit)
     print('cuda:', cuda_version)
     print('cudnn:', cudnn_version)
+    print('xpu:', xpu_version)
+    print('xpu_xccl:', xpu_xccl_version)
 
 def mkl():
     return with_mkl
@@ -478,6 +506,40 @@ def cudnn():
 
     """
     return cudnn_version
+
+def xpu():
+    """Get xpu version of paddle package.
+
+    Returns:
+        string: Return the version information of xpu. If paddle package is non-XPU version, it will return False.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+
+            paddle.version.xpu()
+            # '20230114'
+
+    """
+    return xpu_version
+
+def xpu_xccl():
+    """Get xpu xccl version of paddle package.
+
+    Returns:
+        string: Return the version information of xpu xccl. If paddle package is non-XPU version, it will return False.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+
+            paddle.version.xpu_xccl()
+            # '1.0.7'
+
+    """
+    return xpu_xccl_version
 '''
     commit = git_commit()
 
@@ -500,6 +562,8 @@ def cudnn():
                 'version': env_dict.get("PADDLE_VERSION"),
                 'cuda': get_cuda_version(),
                 'cudnn': get_cudnn_version(),
+                'xpu': get_xpu_version(),
+                'xpu_xccl': get_xpu_xccl_version(),
                 'commit': commit,
                 'istaged': is_taged(),
                 'with_mkl': env_dict.get("WITH_MKL"),
diff --git a/tools/codestyle/test_docstring_checker.py b/tools/codestyle/test_docstring_checker.py
index 8e18fd07030a8d9f0543cb8b44dc089fc55fe19d..ddc0ed185db4c6897371331e4c69b5a2637ec078 100644
--- a/tools/codestyle/test_docstring_checker.py
+++ b/tools/codestyle/test_docstring_checker.py
@@ -218,7 +218,7 @@ def fc(input,
         ValueError: If rank of the input tensor is less than 2.
     Examples:
         .. code-block:: python
-            data = fluid.layers.data(name="data", shape=[32, 32], dtype="float32")
+            data = paddle.static.data(name="data", shape=[-1, 32, 32], dtype="float32")
             fc = paddle.static.nn.fc(x=data, size=1000, activation="tanh")
     """
     raise ValueError('A very specific bad thing happened.')
diff --git a/tools/infrt/fake_models/multi_fc.py b/tools/infrt/fake_models/multi_fc.py
index ded9f67bd34db7bbe79e1ed84c96ca86bf55266a..8e3987a35219dda1eb074d30652b99dbabe1fa58 100644
--- a/tools/infrt/fake_models/multi_fc.py
+++ b/tools/infrt/fake_models/multi_fc.py
@@ -21,8 +21,8 @@ size = 2
 num_layers = 4
 paddle.enable_static()
 
-a = fluid.layers.data(name="A", shape=[-1, size], dtype='float32')
-label = fluid.layers.data(name="label", shape=[size], dtype='float32')
+a = paddle.static.data(name="A", shape=[-1, size], dtype='float32')
+label = paddle.static.data(name="label", shape=[-1, size], dtype='float32')
 
 fc_out = paddle.static.nn.fc(
     x=a,