diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt
index 2e189e4865f4d820d09d5b135cadbb31f9500279..d4d5f4903f8639fcecd1a0c986e036c4eacd8aaf 100755
--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@@ -438,6 +438,7 @@ if(WITH_XPU)
     SRCS operator.cc
     DEPS xpu_op_list
          op_info
+         proto_desc
          device_context
          tensor
          scope
@@ -462,6 +463,7 @@ else()
     operator
     SRCS operator.cc
     DEPS op_info
+         proto_desc
          device_context
          tensor
          scope
diff --git a/paddle/fluid/framework/attribute.cc b/paddle/fluid/framework/attribute.cc
index 13f175ce0b1cd94bd550fdac16cec652a29c5e50..dd456b147ac8d67f262a8e9708023ada77aa4978 100644
--- a/paddle/fluid/framework/attribute.cc
+++ b/paddle/fluid/framework/attribute.cc
@@ -42,6 +42,10 @@ paddle::any GetAttrValue(const Attribute& attr) {
       return PADDLE_GET_CONST(std::vector<int64_t>, attr);
     case proto::AttrType::FLOAT64S:
       return PADDLE_GET_CONST(std::vector<double>, attr);
+    case proto::AttrType::VAR:
+      return PADDLE_GET_CONST(VarDesc*, attr);
+    case proto::AttrType::VARS:
+      return PADDLE_GET_CONST(std::vector<VarDesc*>, attr);
     case proto::AttrType::BLOCK:
       return PADDLE_GET_CONST(BlockDesc*, attr);
     case proto::AttrType::BLOCKS:
diff --git a/paddle/fluid/framework/attribute.h b/paddle/fluid/framework/attribute.h
index a149c18f542e2542cb0535a132f3003881a7a0b6..4d3ba2a1820be55807d9d2122b9a75f8b828da21 100644
--- a/paddle/fluid/framework/attribute.h
+++ b/paddle/fluid/framework/attribute.h
@@ -221,6 +221,28 @@ inline proto::AttrType AttrTypeID(const Attribute& attr) {
   return static_cast<proto::AttrType>(attr.index() - 1);
 }
 
+inline bool IsAttrVar(const Attribute& attr) {
+  return AttrTypeID(attr) == proto::AttrType::VAR;
+}
+
+inline bool IsAttrVars(const Attribute& attr) {
+  return AttrTypeID(attr) == proto::AttrType::VARS;
+}
+
+inline bool HasAttrVar(const Attribute& attr) {
+  return IsAttrVar(attr) || IsAttrVars(attr);
+}
+
+inline AttributeMap FilterAttrVar(const AttributeMap& attrs) {
+  AttributeMap attrs_var;
+  for (auto& attr : attrs) {
+    if (HasAttrVar(attr.second)) {
+      attrs_var.emplace(attr);
+    }
+  }
+  return attrs_var;
+}
+
 class AttrReader {
  public:
   explicit AttrReader(const AttributeMap& attrs)
@@ -414,9 +436,15 @@ class TypedAttrChecker {
       }
       return;
     }
+    // If attribute is VarDesc(s), we should verify it's dtype and shape.
+    auto it = attr_map->find(attr_name_);
+    if (it != attr_map->end() && HasAttrVar(it->second)) {
+      VLOG(1) << "Found Attribute " << attr_name_
+              << " with Variable, skip attr_checker.";
+      return;
+    }
 
     if (only_check_exist_value) {
-      auto it = attr_map->find(attr_name_);
       if (it != attr_map->end()) {
         ExtractAttribute<T> extract_attr(attr_name_);
         T* attr_value = extract_attr(it->second);
@@ -425,7 +453,6 @@ class TypedAttrChecker {
         }
       }
     } else {
-      auto it = attr_map->find(attr_name_);
       if (it == attr_map->end()) {
         // user do not set this attr
         PADDLE_ENFORCE_EQ(
diff --git a/paddle/fluid/framework/attribute_test.cc b/paddle/fluid/framework/attribute_test.cc
index 8a47e41d38359ca6b0de53db9629ec1b0b8e8ceb..ab6f71926b80f510fdefb16096b976a08d0252db 100644
--- a/paddle/fluid/framework/attribute_test.cc
+++ b/paddle/fluid/framework/attribute_test.cc
@@ -19,6 +19,7 @@
 
 #include "gtest/gtest.h"
 #include "paddle/fluid/framework/program_desc.h"
+#include "paddle/fluid/framework/var_desc.h"
 #include "paddle/utils/any.h"
 
 TEST(Attribute, GetAttrValueToAny) {
@@ -72,6 +73,25 @@ TEST(Attribute, GetAttrValueToAny) {
   EXPECT_EQ(vec_bool[0], true);
   EXPECT_EQ(vec_bool[1], true);
 
+  paddle::framework::VarDesc var_desc("axis");
+  paddle::framework::Attribute var_attr(&var_desc);
+  auto rlt_var_attr = paddle::framework::GetAttrValue(var_attr);
+  auto var_desc_ptr =
+      paddle::any_cast<paddle::framework::VarDesc*>(rlt_var_attr);
+  EXPECT_NE(var_desc_ptr, nullptr);
+  EXPECT_EQ(var_desc_ptr->Name(), var_desc.Name());
+
+  paddle::framework::VarDesc var2_desc("prob");
+  std::vector<paddle::framework::VarDesc*> vars_desc{&var_desc, &var2_desc};
+  paddle::framework::Attribute vars_attr(vars_desc);
+
+  auto rlt_vars_attr = paddle::framework::GetAttrValue(vars_attr);
+  auto rlt_vars_desc =
+      paddle::any_cast<std::vector<paddle::framework::VarDesc*>>(rlt_vars_attr);
+  EXPECT_EQ(rlt_vars_desc.size(), vars_desc.size());
+  EXPECT_EQ(rlt_vars_desc[0]->Name(), vars_desc[0]->Name());
+  EXPECT_EQ(rlt_vars_desc[1]->Name(), vars_desc[1]->Name());
+
   paddle::framework::ProgramDesc prog;
   paddle::framework::proto::BlockDesc proto_block;
   paddle::framework::BlockDesc block_desc(&prog, &proto_block);
diff --git a/paddle/fluid/framework/block_desc.cc b/paddle/fluid/framework/block_desc.cc
index 84d52c996d0562e6feadfc573c61ec77cf2703f9..d9dd10f7165374dbe64c4eb66bd162c36a1ab99d 100644
--- a/paddle/fluid/framework/block_desc.cc
+++ b/paddle/fluid/framework/block_desc.cc
@@ -217,13 +217,15 @@ BlockDesc::BlockDesc(const BlockDesc &other,
                      ProgramDesc *prog)
     : prog_(prog), desc_(desc) {
   need_update_ = true;
-  for (auto &op : other.ops_) {
-    ops_.emplace_back(new OpDesc(*op, this));
-  }
+  // NOTE(dev): Init vars_ firstly so we can find them
+  // while constructing OpDesc.
   for (auto &it : other.vars_) {
     auto *var = new VarDesc(*it.second);
     vars_[it.first].reset(var);
   }
+  for (auto &op : other.ops_) {
+    ops_.emplace_back(new OpDesc(*op, this));
+  }
 }
 
 void BlockDesc::SetForwardBlockID(int32_t forward_block_id) {
@@ -273,7 +275,10 @@ void BlockDesc::MoveFrom(BlockDesc *block) {
       const auto &attr_name = pair.first;
       const auto &attr_value = pair.second;
       auto attr_type = static_cast<proto::AttrType>(attr_value.index() - 1);
-      if (attr_type == proto::AttrType::BLOCK) {
+      if (attr_type == proto::AttrType::VAR ||
+          attr_type == proto::AttrType::VARS) {
+        dst_op->UpdateVarAttr(attr_name, attr_value);
+      } else if (attr_type == proto::AttrType::BLOCK) {
         auto block_id = PADDLE_GET_CONST(BlockDesc *, attr_value)->ID();
         dst_op->SetBlockAttr(attr_name, prog_->MutableBlock(block_id));
         VLOG(10) << "Set block attr " << attr_name << " id " << block_id;
diff --git a/paddle/fluid/framework/framework.proto b/paddle/fluid/framework/framework.proto
index 0d3e7c2741c17b2649c7e73a3c97d3f117c6027f..391197d967abe29784eeccd162baa4003b47b2ae 100644
--- a/paddle/fluid/framework/framework.proto
+++ b/paddle/fluid/framework/framework.proto
@@ -36,6 +36,8 @@ enum AttrType {
   BLOCKS = 10;
   LONGS = 11;
   FLOAT64S = 12;
+  VAR = 13;
+  VARS = 14;
 }
 
 message ProcessMeshDesc {
@@ -65,6 +67,8 @@ message OpDesc {
     repeated int32 blocks_idx = 14;
     repeated int64 longs = 15;
     repeated double float64s = 16;
+    optional string var_name = 17;
+    repeated string vars_name = 18;
   };
 
   message Var {
diff --git a/paddle/fluid/framework/infershape_utils.cc b/paddle/fluid/framework/infershape_utils.cc
index c525888ca116c9cd164123d758395eb3c2632c7a..eb988d59a2a8b51b1480184c8990e18f1ff52967 100644
--- a/paddle/fluid/framework/infershape_utils.cc
+++ b/paddle/fluid/framework/infershape_utils.cc
@@ -451,12 +451,13 @@ CompatInferMetaContext BuildInferMetaContext(InferShapeContext* ctx,
   auto attr_reader = ctx->Attrs();
   for (size_t i = 0; i < attr_names.size(); ++i) {
     auto& attr_name = attr_names[i];
-    VLOG(6) << "BuildInferMetaContext: " << attr_name << ": "
-            << attr_defs[i].type_index;
     auto* attr_ptr = attr_reader.GetAttr(attr_name);
+    bool is_attr_var = attr_ptr != nullptr && HasAttrVar(*attr_ptr);
+    VLOG(6) << "BuildInferMetaContext: " << attr_name << ": "
+            << attr_defs[i].type_index << ", is_attr_var: " << is_attr_var;
     switch (attr_defs[i].type_index) {
       case phi::AttributeType::SCALAR:
-        if (attr_ptr) {
+        if (attr_ptr && !is_attr_var) {
           auto& attr = *attr_ptr;
           switch (AttrTypeID(attr)) {
             case framework::proto::AttrType::FLOAT:
@@ -502,7 +503,7 @@ CompatInferMetaContext BuildInferMetaContext(InferShapeContext* ctx,
         break;
       case phi::AttributeType::INT_ARRAY:
         // When attr is a vector_tensor or tensor, transform it to IntArray
-        if (attr_ptr) {
+        if (attr_ptr && !is_attr_var) {
           auto& attr = *attr_ptr;
           switch (AttrTypeID(attr)) {
             case framework::proto::AttrType::INTS:
diff --git a/paddle/fluid/framework/ir/graph.cc b/paddle/fluid/framework/ir/graph.cc
index fceed0fc44e5f6baa274254ad4307b78cac0fb12..6946fb6d7d9eee9ed63f55682ddb1e1bc80283b9 100644
--- a/paddle/fluid/framework/ir/graph.cc
+++ b/paddle/fluid/framework/ir/graph.cc
@@ -149,7 +149,7 @@ std::map<std::string, std::vector<ir::Node *>> Graph::InitFromBlock(
     ++desc_order;
     // For input args, reuse the same var name if it was created before.
     // Otherwise, create a new one.
-    for (auto &each_var_name : op->InputArgumentNames()) {
+    for (auto &each_var_name : op->InputArgumentNames(true)) {
       not_visited_vars.erase(each_var_name);
       ir::Node *var = nullptr;
       if (var_nodes.find(each_var_name) != var_nodes.end()) {
diff --git a/paddle/fluid/framework/op_desc.cc b/paddle/fluid/framework/op_desc.cc
index 52ac86d060694076f1906d5d78e7cd23bcde8cae..cc7923602647fc527c9fd5fa25333b544dd2d931 100644
--- a/paddle/fluid/framework/op_desc.cc
+++ b/paddle/fluid/framework/op_desc.cc
@@ -425,6 +425,9 @@ OpDesc::OpDesc(const OpDesc &other, BlockDesc *block) {
   CopyFrom(other);
   block_ = block;
   need_update_ = true;
+  for (auto &iter : attrs_) {
+    UpdateVarAttr(iter.first, iter.second);
+  }
 }
 
 void OpDesc::CopyFrom(const OpDesc &op_desc) {
@@ -465,9 +468,13 @@ OpDesc::OpDesc(const proto::OpDesc &desc, BlockDesc *block)
   for (const proto::OpDesc::Attr &attr : desc_.attrs()) {
     std::string attr_name = attr.name();
     // The sub_block referred to by the BLOCK attr hasn't been added
-    // to ProgramDesc class yet, we skip setting BLOCK/BLOCKS attr here.
-    if (attr.type() != proto::AttrType::BLOCK &&
-        attr.type() != proto::AttrType::BLOCKS) {
+    // to ProgramDesc class yet, we skip setting BLOCK/BLOCKS/VAR/VARS attr
+    // here.
+    auto attr_type = attr.type();
+    if (attr_type != proto::AttrType::BLOCK &&
+        attr_type != proto::AttrType::BLOCKS &&
+        attr_type != proto::AttrType::VAR &&
+        attr_type != proto::AttrType::VARS) {
       attrs_[attr_name] = GetAttrValue(attr);
     }
   }
@@ -489,9 +496,31 @@ const std::vector<std::string> &OpDesc::Input(const std::string &name) const {
   return it->second;
 }
 
-std::vector<std::string> OpDesc::InputArgumentNames() const {
+std::vector<std::string> OpDesc::Input(const std::string &name,
+                                       bool with_attr_var) const {
+  // Attribute with VarDesc type will consider as Input
+  if (with_attr_var) {
+    auto it = attrs_.find(name);
+    if (it != attrs_.end() && HasAttrVar(it->second))
+      return AttrVarNames(it->second);
+  }
+  return this->Input(name);
+}
+
+VariableNameMap OpDesc::Inputs(bool with_attr_var) const {
+  if (!with_attr_var) {
+    return inputs_;
+  }
+  VariableNameMap res = inputs_;
+  for (auto &attr : FilterAttrVar(attrs_)) {
+    res[attr.first] = AttrVarNames(attr.second);
+  }
+  return res;
+}
+
+std::vector<std::string> OpDesc::InputArgumentNames(bool with_attr_var) const {
   std::vector<std::string> retv;
-  for (auto &ipt : this->inputs_) {
+  for (auto &ipt : this->Inputs(with_attr_var)) {
     retv.insert(retv.end(), ipt.second.begin(), ipt.second.end());
   }
   return retv;
@@ -558,24 +587,31 @@ bool OpDesc::HasProtoAttr(const std::string &name) const {
   return false;
 }
 
-proto::AttrType OpDesc::GetAttrType(const std::string &name) const {
-  auto it = attrs_.find(name);
-  PADDLE_ENFORCE_NE(
-      it,
-      attrs_.end(),
-      platform::errors::NotFound("Attribute %s is not found.", name));
-  return static_cast<proto::AttrType>(it->second.index() - 1);
+proto::AttrType OpDesc::GetAttrType(const std::string &name,
+                                    bool with_attr_var) const {
+  auto attr = this->GetAttr(name, with_attr_var);
+  return static_cast<proto::AttrType>(attr.index() - 1);
 }
 
-std::vector<std::string> OpDesc::AttrNames() const {
+std::vector<std::string> OpDesc::AttrNames(bool with_attr_var) const {
   std::vector<std::string> retv;
   retv.reserve(attrs_.size());
   for (auto &attr : attrs_) {
+    if (!with_attr_var && HasAttrVar(attr.second)) continue;
     retv.push_back(attr.first);
   }
   return retv;
 }
 
+bool OpDesc::HasAttr(const std::string &name, bool with_attr_var) const {
+  auto iter = attrs_.find(name);
+  bool is_found = iter != attrs_.end();
+  if (with_attr_var) {
+    return is_found;
+  }
+  return is_found && !HasAttrVar(iter->second);
+}
+
 void OpDesc::RemoveAttr(const std::string &name) {
   attrs_.erase(name);
   need_update_ = true;
@@ -647,6 +683,16 @@ void OpDesc::SetAttr(const std::string &name, const Attribute &v) {
   need_update_ = true;
 }
 
+void OpDesc::SetVarAttr(const std::string &name, VarDesc *var) {
+  this->attrs_[name] = var;
+  need_update_ = true;
+}
+
+void OpDesc::SetVarsAttr(const std::string &name, std::vector<VarDesc *> vars) {
+  this->attrs_[name] = vars;
+  need_update_ = true;
+}
+
 void OpDesc::SetBlockAttr(const std::string &name, BlockDesc *block) {
   this->attrs_[name] = block;
   need_update_ = true;
@@ -664,12 +710,18 @@ void OpDesc::SetAttrMap(
   need_update_ = true;
 }
 
-Attribute OpDesc::GetAttr(const std::string &name) const {
+Attribute OpDesc::GetAttr(const std::string &name, bool with_attr_var) const {
   auto it = attrs_.find(name);
   PADDLE_ENFORCE_NE(
       it,
       attrs_.end(),
       platform::errors::NotFound("Attribute %s is not found.", name));
+  if (!with_attr_var) {
+    PADDLE_ENFORCE_EQ(
+        HasAttrVar(it->second),
+        false,
+        platform::errors::NotFound("Attribute %s is not found.", name));
+  }
   return it->second;
 }
 
@@ -790,6 +842,19 @@ struct SetAttrDescVisitor {
   void operator()(const std::vector<bool> &v) const {
     VectorToRepeated(v, attr_->mutable_bools());
   }
+
+  void operator()(const std::vector<VarDesc *> &v) const {
+    std::vector<std::string> var_names;
+    for (auto var : v) {
+      var_names.emplace_back(var->Name());
+    }
+    VectorToRepeated(var_names, attr_->mutable_vars_name());
+  }
+
+  void operator()(const VarDesc *desc) const {
+    attr_->set_var_name(desc->Name());
+  }
+
   void operator()(const std::vector<BlockDesc *> &v) const {
     std::vector<int> blocks_idx;
     for (auto blk : v) {
@@ -866,12 +931,7 @@ void OpDesc::InferShape(const BlockDesc &block) {
   try {
     VLOG(3) << "CompileTime infer shape on " << Type();
     auto &op_info = OpInfoMap::Instance().Get(this->Type());
-    auto *checker = op_info.Checker();
-    if (checker != nullptr) {
-      // set dafault value here
-      VLOG(10) << "begin to check attribute of " << Type();
-      checker->Check(&attrs_);
-    }
+    this->CheckAttrs();
     auto &infer_shape = op_info.infer_shape_;
     PADDLE_ENFORCE_EQ(
         static_cast<bool>(infer_shape),
@@ -916,15 +976,62 @@ void OpDesc::InferVarType(BlockDesc *block) const {
   }
 }
 
+void OpDesc::UpdateVarAttr(const std::string &name, const Attribute &attr) {
+  auto attr_type = static_cast<proto::AttrType>(attr.index() - 1);
+  auto type = GetAttrType(name, true);
+  if (type == proto::AttrType::VAR) {
+    PADDLE_ENFORCE_EQ(
+        attr_type,
+        type,
+        platform::errors::InvalidArgument(
+            "Required attr.type == proto::AttrType::VAR, but received %s",
+            attr_type));
+    auto *var_desc = PADDLE_GET_CONST(VarDesc *, attr);
+    VLOG(3) << "Update AttrVar " << name << " with " << var_desc->Name();
+    attrs_[name] = FindVarRecursive(var_desc->Name());
+  } else if (type == proto::AttrType::VARS) {
+    PADDLE_ENFORCE_EQ(
+        attr_type,
+        type,
+        platform::errors::InvalidArgument(
+            "Required attr.type == proto::AttrType::VARS, but received %s",
+            attr_type));
+    auto vars_desc = PADDLE_GET_CONST(std::vector<VarDesc *>, attr);
+    std::vector<VarDesc *> new_val;
+    for (auto &var_desc : vars_desc) {
+      VLOG(3) << "Update AttrVars " << name << " with " << var_desc->Name();
+      new_val.emplace_back(FindVarRecursive(var_desc->Name()));
+    }
+    attrs_[name] = std::move(new_val);
+  }
+}
+
+VarDesc *OpDesc::FindVarRecursive(const std::string &name) {
+  auto *cur_block = block_;
+  while (cur_block != nullptr && cur_block->ID() >= 0) {
+    auto *var = block_->FindVar(name);
+    if (var != nullptr) {
+      return var;
+    }
+    cur_block = cur_block->ParentBlock();
+  }
+  PADDLE_THROW(platform::errors::NotFound(
+      "Not found Var(%s) from Block(%d) back into global Block.",
+      name,
+      block_->ID()));
+}
+
 CompileTimeInferShapeContext::CompileTimeInferShapeContext(
     const OpDesc &op, const BlockDesc &block)
     : op_(op), block_(block) {}
 
 bool CompileTimeInferShapeContext::HasInput(const std::string &name) const {
-  if (op_.Inputs().find(name) == op_.Inputs().end()) {
+  auto inputs = op_.Inputs(/*with_attr_var=*/true);
+  if (inputs.find(name) == inputs.end()) {
     return false;
   }
-  const std::vector<std::string> &input_names = op_.Input(name);
+  const std::vector<std::string> &input_names =
+      op_.Input(name, /*with_attr_var=*/true);
   auto length = input_names.size();
   if (length == 0) {
     return false;
@@ -959,14 +1066,16 @@ bool CompileTimeInferShapeContext::HasOutput(const std::string &name) const {
 }
 
 bool CompileTimeInferShapeContext::HasAttr(const std::string &name) const {
-  return op_.HasAttr(name);
+  return op_.HasAttr(name, /*with_attr_var=*/false);
 }
 
 bool CompileTimeInferShapeContext::HasInputs(const std::string &name) const {
-  if (op_.Inputs().find(name) == op_.Inputs().end()) {
+  auto inputs = op_.Inputs(/*with_attr_var=*/true);
+  if (inputs.find(name) == inputs.end()) {
     return false;
   }
-  const std::vector<std::string> &input_names = op_.Input(name);
+  const std::vector<std::string> &input_names =
+      op_.Input(name, /*with_attr_var=*/true);
   if (input_names.empty()) {
     return false;
   }
@@ -1004,7 +1113,7 @@ AttrReader CompileTimeInferShapeContext::Attrs() const {
 
 std::vector<std::string> CompileTimeInferShapeContext::Inputs(
     const std::string &name) const {
-  return op_.Input(name);
+  return op_.Input(name, /*with_attr_var=*/true);
 }
 
 std::vector<std::string> CompileTimeInferShapeContext::Outputs(
@@ -1054,5 +1163,21 @@ proto::VarType::Type CompileTimeInferShapeContext::GetVarType(
   return block_.FindVarRecursive(name)->GetType();
 }
 
+std::vector<std::string> AttrVarNames(const Attribute &attr) {
+  std::vector<std::string> vars_name;
+  if (IsAttrVar(attr)) {
+    vars_name.emplace_back(PADDLE_GET_CONST(VarDesc *, attr)->Name());
+  } else if (IsAttrVars(attr)) {
+    for (auto &iter : PADDLE_GET_CONST(std::vector<VarDesc *>, attr)) {
+      vars_name.emplace_back(iter->Name());
+    }
+  } else {
+    PADDLE_THROW(platform::errors::Unimplemented(
+        "Unsupported Attribute value type `%s` for AttrVarNames",
+        platform::demangle(attr.type().name())));
+  }
+  return vars_name;
+}
+
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/fluid/framework/op_desc.h b/paddle/fluid/framework/op_desc.h
index 02186a02e3d8358de2122af2742518b1d8a9ed92..a1e11cb8707099f1e8ff597da761889384c5784b 100644
--- a/paddle/fluid/framework/op_desc.h
+++ b/paddle/fluid/framework/op_desc.h
@@ -27,6 +27,7 @@ limitations under the License. */
 namespace paddle {
 namespace framework {
 
+class VarDesc;
 class BlockDesc;
 class ProgramDesc;
 
@@ -55,7 +56,10 @@ class OpDesc {
 
   const std::vector<std::string> &Input(const std::string &name) const;
 
-  std::vector<std::string> InputArgumentNames() const;
+  std::vector<std::string> Input(const std::string &name,
+                                 bool with_attr_var) const;
+
+  std::vector<std::string> InputArgumentNames(bool with_attr_var = false) const;
 
   void SetInput(const std::string &param_name,
                 const std::vector<std::string> &args);
@@ -72,24 +76,27 @@ class OpDesc {
 
   void RemoveInput(const std::string &name);
 
-  bool HasAttr(const std::string &name) const {
-    return attrs_.find(name) != attrs_.end();
-  }
+  bool HasAttr(const std::string &name, bool with_attr_var = false) const;
 
   bool HasProtoAttr(const std::string &name) const;
 
-  proto::AttrType GetAttrType(const std::string &name) const;
+  proto::AttrType GetAttrType(const std::string &name,
+                              bool with_attr_var = false) const;
 
-  std::vector<std::string> AttrNames() const;
+  std::vector<std::string> AttrNames(bool with_attr_var = false) const;
 
   void SetAttr(const std::string &name, const Attribute &v);
   void RemoveAttr(const std::string &name);
 
+  void SetVarAttr(const std::string &name, VarDesc *var);
+
+  void SetVarsAttr(const std::string &name, std::vector<VarDesc *> vars);
+
   void SetBlockAttr(const std::string &name, BlockDesc *block);
 
   void SetBlocksAttr(const std::string &name, std::vector<BlockDesc *> blocks);
 
-  Attribute GetAttr(const std::string &name) const;
+  Attribute GetAttr(const std::string &name, bool with_attr_var = false) const;
 
   template <typename T>
   T GetAttrIfExists(const std::string &name) const {
@@ -120,11 +127,15 @@ class OpDesc {
   // Only be used in C++
   void SetAttrMap(const AttributeMap &attr_map);
 
-  std::vector<std::string> InputNames() const { return MapKeys(inputs_); }
+  std::vector<std::string> InputNames(bool with_attr_var = false) const {
+    return MapKeys(inputs_);
+  }
   std::vector<std::string> OutputNames() const { return MapKeys(outputs_); }
 
   const VariableNameMap &Inputs() const { return inputs_; }
 
+  VariableNameMap Inputs(bool with_attr_var) const;
+
   const VariableNameMap &Outputs() const { return outputs_; }
 
   VariableNameMap *MutableInputs() {
@@ -156,12 +167,18 @@ class OpDesc {
 
   const BlockDesc *Block() const { return this->block_; }
 
+  void UpdateVarAttr(const std::string &name, const Attribute &attr);
+
   // The Id() and OrignalId() are only used for auto parallel.
   uint64_t Id() const { return id_; }
   uint64_t OriginalId() const { return original_id_; }
   void SetOriginalId(uint64_t original_id) { original_id_ = original_id; }
 
  private:
+  friend class ProgramDesc;
+  // Find VarDesc from OpDesc located Block into global Block
+  VarDesc *FindVarRecursive(const std::string &name);
+
   template <typename MapType>
   static std::vector<typename MapType::key_type> MapKeys(const MapType &map) {
     std::vector<typename MapType::key_type> ret_val;
@@ -188,6 +205,7 @@ class OpDesc {
   VariableNameMap inputs_;
   // output arg name => output variable names
   VariableNameMap outputs_;
+  // attribute name => all original attrs
   AttributeMap attrs_;
 
   // need_update_ indicate there some local changes not be synchronized. If
@@ -202,5 +220,7 @@ class OpDesc {
   // current OpDesc is not built from the other one.
   uint64_t original_id_ = id_;
 };
+
+std::vector<std::string> AttrVarNames(const Attribute &attr);
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc
index a2bade9809b21c1843d8243e13aa1ffb4d72977d..b4ef3efb8216c740cbec9141d099e66bb4838c32 100644
--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
@@ -447,6 +447,13 @@ OperatorBase::OperatorBase(const std::string& type,
     GenerateTemporaryNames();
     CheckAllInputOutputSet();
   }
+  // In OperatorBase level, all attribute with VarDesc type will be considered
+  // as Input.
+  for (auto& attr : FilterAttrVar(attrs)) {
+    VLOG(3) << "found Attribute with Variable type: " << attr.first;
+    inputs_[attr.first] = std::move(AttrVarNames(attr.second));
+    attrs_.erase(attr.first);
+  }
 }
 
 std::vector<std::string> OperatorBase::InputVars() const {
@@ -2725,6 +2732,8 @@ void OperatorWithKernel::BuildPhiKernelContext(
   for (size_t i = 0; i < attr_names.size(); ++i) {
     VLOG(6) << "BuildPhiKernelContext: " << attr_names[i] << ": "
             << attr_defs[i].type_index;
+    // attribute with Variable type has been placed into Inputs(), and
+    // we can parse them from RuntimeContext.inputs.
     auto attr_iter = Attrs().find(attr_names[i]);
     switch (attr_defs[i].type_index) {
       case phi::AttributeType::SCALAR:
diff --git a/paddle/fluid/framework/program_desc.cc b/paddle/fluid/framework/program_desc.cc
index b184bc8be368138a487cd2192c9354878ea63104..e35df9f59dbb6f1ea2d6950a5c0035b3824f55f8 100644
--- a/paddle/fluid/framework/program_desc.cc
+++ b/paddle/fluid/framework/program_desc.cc
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #include "paddle/fluid/framework/program_desc.h"
 
+#include <algorithm>
 #include "paddle/fluid/framework/feed_fetch_type.h"
 #include "paddle/fluid/framework/version.h"
 
@@ -97,6 +98,23 @@ ProgramDesc::ProgramDesc(const ProgramDesc &o) {
             block_descs.push_back(MutableBlock(block_id));
           }
           op->SetBlocksAttr(attr_name, block_descs);
+        } else if (op->GetAttrType(attr_name, true) == proto::AttrType::VAR) {
+          VarDesc *var_desc =
+              PADDLE_GET_CONST(VarDesc *, op->GetAttr(attr_name, true));
+          op->SetVarAttr(attr_name,
+                         o.Block(block_id).FindVarRecursive(var_desc->Name()));
+        } else if (op->GetAttrType(attr_name, true) == proto::AttrType::VARS) {
+          std::vector<VarDesc *> vars_desc = PADDLE_GET_CONST(
+              std::vector<VarDesc *>, op->GetAttr(attr_name, true));
+          std::vector<VarDesc *> new_vars_desc;
+          std::transform(
+              vars_desc.begin(),
+              vars_desc.end(),
+              std::back_inserter(new_vars_desc),
+              [&](VarDesc *var_desc) {
+                return o.Block(block_id).FindVarRecursive(var_desc->Name());
+              });
+          op->SetVarsAttr(attr_name, new_vars_desc);
         }
       }
     }
@@ -129,7 +147,21 @@ void ProgramDesc::InitFromProto() {
   for (auto &block : blocks_) {
     for (auto *op : block->AllOps()) {
       for (const auto &attr : op->Proto()->attrs()) {
-        if (attr.type() == proto::AttrType::BLOCK) {
+        if (attr.type() == proto::AttrType::VAR) {
+          std::string var_name = attr.var_name();
+          VLOG(3) << "InitFromProto: SetVarAttr " << attr.name() << " from "
+                  << var_name;
+          op->SetVarAttr(attr.name(), op->FindVarRecursive(var_name));
+        } else if (attr.type() == proto::AttrType::VARS) {
+          auto vars_name = attr.vars_name();
+          std::vector<VarDesc *> vars_desc;
+          for (auto &var_name : vars_name) {
+            VLOG(3) << "InitFromProto: SetVarsAttr " << attr.name() << " from "
+                    << var_name;
+            vars_desc.emplace_back(op->FindVarRecursive(var_name));
+          }
+          op->SetVarsAttr(attr.name(), vars_desc);
+        } else if (attr.type() == proto::AttrType::BLOCK) {
           size_t blk_idx = attr.block_idx();
           op->SetBlockAttr(attr.name(), this->MutableBlock(blk_idx));
         } else if (attr.type() == proto::AttrType::BLOCKS) {
diff --git a/paddle/fluid/framework/prune.cc b/paddle/fluid/framework/prune.cc
index ede6a99c43678c3ee8fa4697cf388da7ee7c4c94..ceb45a83711ea941fe1a44917b798018fb88a898 100644
--- a/paddle/fluid/framework/prune.cc
+++ b/paddle/fluid/framework/prune.cc
@@ -180,6 +180,9 @@ void prune_impl(const proto::ProgramDesc& input,
                 std::map<int, int>* pruned_origin_block_id_map) {
   auto& block = input.blocks(block_id);
   auto& ops = block.ops();
+  auto add_dependent_var = [&](const std::string& name) {
+    if (feed_var_names.count(name) == 0) dependent_vars->insert(name);
+  };
 
   bool expect_feed = true;
   for (auto& op_desc : ops) {
@@ -245,8 +248,17 @@ void prune_impl(const proto::ProgramDesc& input,
       // For eval / infer mode, there is no optimize op in program.
       for (auto& var : op_desc.inputs()) {
         for (auto& argu : var.arguments()) {
-          if (feed_var_names.count(argu) == 0) {
-            dependent_vars->insert(argu);
+          add_dependent_var(argu);
+        }
+      }
+      // NOTE(dev): All attibute with VarDesc type is considered as Input,
+      // so they shall be added into dependent_vars.
+      for (auto& attr : op_desc.attrs()) {
+        if (attr.type() == proto::AttrType::VAR) {
+          add_dependent_var(attr.var_name());
+        } else if (attr.type() == proto::AttrType::VARS) {
+          for (auto& name : attr.vars_name()) {
+            add_dependent_var(name);
           }
         }
       }
@@ -331,20 +343,30 @@ void prune_impl(const proto::ProgramDesc& input,
   }
 
   std::set<std::string> var_names;
+  auto add_var_names = [&](const std::string& name) {
+    if (var_map.count(name) != 0) var_names.insert(name);
+  };
   for (const auto& op : *op_field) {
     auto& input_field = op.inputs();
     for (auto& input_var : input_field) {
       for (auto& arg : input_var.arguments()) {
-        if (var_map.count(arg) != 0) {
-          var_names.insert(arg);
-        }
+        add_var_names(arg);
       }
     }
     auto& output_field = op.outputs();
     for (auto& output_var : output_field) {
       for (auto& arg : output_var.arguments()) {
-        if (var_map.count(arg) != 0) {
-          var_names.insert(arg);
+        add_var_names(arg);
+      }
+    }
+    // NOTE(dev): All attibute with VarDesc type is considered as Input,
+    // so they shall be added into dependent_vars.
+    for (auto& attr : op.attrs()) {
+      if (attr.type() == proto::AttrType::VAR) {
+        add_var_names(attr.var_name());
+      } else if (attr.type() == proto::AttrType::VARS) {
+        for (auto& name : attr.vars_name()) {
+          add_var_names(name);
         }
       }
     }
diff --git a/paddle/fluid/framework/type_defs.h b/paddle/fluid/framework/type_defs.h
index 3bcad63f21a841d4be1eeca5360485a6760eb260..31a006914aca7cdeb98e110dada80e4a6a81e180 100644
--- a/paddle/fluid/framework/type_defs.h
+++ b/paddle/fluid/framework/type_defs.h
@@ -34,6 +34,7 @@ class OperatorBase;
 class OpDesc;
 class InferShapeContext;
 class InferVarTypeContext;
+class VarDesc;
 class BlockDesc;
 class Variable;
 class InferNoNeedBufferVarsFN;
@@ -55,7 +56,9 @@ using Attribute = paddle::variant<paddle::blank,
                                   int64_t,
                                   std::vector<BlockDesc*>,
                                   std::vector<int64_t>,
-                                  std::vector<double>>;
+                                  std::vector<double>,
+                                  VarDesc*,
+                                  std::vector<VarDesc*>>;
 using AttributeMap = std::unordered_map<std::string, Attribute>;
 
 #ifdef PADDLE_WITH_ASCEND_CL
@@ -73,6 +76,8 @@ using NPUAttribute = paddle::variant<paddle::blank,
                                      std::vector<BlockDesc*>,
                                      std::vector<int64_t>,
                                      std::vector<double>,
+                                     VarDesc*,
+                                     std::vector<VarDesc*>,
                                      std::vector<std::vector<int64_t>>>;
 
 using NPUAttributeMap = std::unordered_map<std::string, NPUAttribute>;
diff --git a/paddle/fluid/operators/tile_op.cc b/paddle/fluid/operators/tile_op.cc
index 1215bf2de3c67ca83adfaa878d7ddb0560691fb5..1d5b57a8a3d53bf50ca74d86971c8387e39af982 100644
--- a/paddle/fluid/operators/tile_op.cc
+++ b/paddle/fluid/operators/tile_op.cc
@@ -114,42 +114,6 @@ class TileGradOp : public framework::OperatorWithKernel {
                    "TileGrad");
 
     auto x_dims = ctx->GetInputDim("X");
-
-    std::vector<int> repeat_times =
-        ctx->Attrs().Get<std::vector<int>>("repeat_times");
-    if (repeat_times.size() == 0) {
-      repeat_times = std::vector<int>(x_dims.size(), -1);
-    }
-
-    auto out_dims = ctx->GetInputDim(framework::GradVarName("Out"));
-    auto x_dim_vec = phi::vectorize<int>(x_dims);
-    if (x_dim_vec.size() > repeat_times.size()) {
-      auto diff = x_dim_vec.size() - repeat_times.size();
-      repeat_times.insert(repeat_times.begin(), diff, -1);
-    } else {
-      auto diff = repeat_times.size() - x_dim_vec.size();
-      x_dim_vec.insert(x_dim_vec.begin(), diff, -1);
-    }
-
-    for (size_t i = 0; i < repeat_times.size(); ++i) {
-      if (repeat_times[i] == -1 || x_dim_vec[i] == -1) {
-        continue;
-      } else {
-        if (ctx->IsRuntime()) {
-          PADDLE_ENFORCE_EQ(
-              x_dim_vec[i] * repeat_times[i],
-              out_dims[i],
-              platform::errors::InvalidArgument(
-                  "The size (%d) of the dimension %d of Input(Out@GRAD) should "
-                  "be equal to the multiplication of the crroresponding "
-                  "dimension size of Input(X) (%d) and repeat_times (%d).",
-                  out_dims[i],
-                  i,
-                  x_dim_vec[i],
-                  repeat_times[i]));
-        }
-      }
-    }
     auto x_grad_name = framework::GradVarName("X");
 
     if (ctx->HasOutput(x_grad_name)) {
diff --git a/paddle/fluid/pybind/protobuf.cc b/paddle/fluid/pybind/protobuf.cc
index 4cdf135b8ed59de52a22d2c3e2be934437da44f0..a670813a245a73768745cc5448b727bb26e48b2f 100644
--- a/paddle/fluid/pybind/protobuf.cc
+++ b/paddle/fluid/pybind/protobuf.cc
@@ -289,7 +289,9 @@ void BindOpDesc(pybind11::module *m) {
       .value("BOOL", pd::proto::AttrType::BOOLEAN)
       .value("BOOLS", pd::proto::AttrType::BOOLEANS)
       .value("BLOCK", pd::proto::AttrType::BLOCK)
-      .value("BLOCKS", pd::proto::AttrType::BLOCKS);
+      .value("BLOCKS", pd::proto::AttrType::BLOCKS)
+      .value("VAR", pd::proto::AttrType::VAR)
+      .value("VARS", pd::proto::AttrType::VARS);
 
   pybind11::class_<pd::OpDesc> op_desc(*m, "OpDesc", "");
   op_desc
@@ -300,8 +302,16 @@ void BindOpDesc(pybind11::module *m) {
       .def("copy_from", &pd::OpDesc::CopyFrom)
       .def("type", &pd::OpDesc::Type)
       .def("set_type", &pd::OpDesc::SetType)
-      .def("input", &pd::OpDesc::Input)
-      .def("input_names", &pd::OpDesc::InputNames)
+      .def("input",
+           [](pd::OpDesc &self, const std::string &name) {
+             return self.Input(name);
+           })
+      .def(
+          "input_names",
+          [](pd::OpDesc &self, bool with_attr_var) {
+            return self.InputNames(with_attr_var);
+          },
+          py::arg("with_attr_var") = false)
       .def("output", &pd::OpDesc::Output)
       .def("output_names", &pd::OpDesc::OutputNames)
       .def("set_input",
@@ -318,16 +328,46 @@ void BindOpDesc(pybind11::module *m) {
            })
       .def("remove_output", &pd::OpDesc::RemoveOutput)
       .def("remove_input", &pd::OpDesc::RemoveInput)
-      .def("input_arg_names", &pd::OpDesc::InputArgumentNames)
+      .def(
+          "input_arg_names",
+          [](pd::OpDesc &self, bool with_attr_var) {
+            return self.InputArgumentNames(with_attr_var);
+          },
+          py::arg("with_attr_var") = false)
       .def("output_arg_names", &pd::OpDesc::OutputArgumentNames)
       .def("_rename_input", &pd::OpDesc::RenameInput)
       .def("_rename_output", &pd::OpDesc::RenameOutput)
-      .def("has_attr", &pd::OpDesc::HasAttr)
-      .def("attr_type", &pd::OpDesc::GetAttrType)
-      .def("attr_names", &pd::OpDesc::AttrNames)
+      .def(
+          "has_attr",
+          [](pd::OpDesc &self, const std::string &name, bool with_attr_var) {
+            return self.HasAttr(name, with_attr_var);
+          },
+          py::arg("name"),
+          py::arg("with_attr_var") = false)
+      .def(
+          "attr_type",
+          [](pd::OpDesc &self, const std::string &name, bool with_attr_var) {
+            return self.GetAttrType(name, with_attr_var);
+          },
+          py::arg("name"),
+          py::arg("with_attr_var") = false)
+      .def(
+          "attr_names",
+          [](pd::OpDesc &self, bool with_attr_var) {
+            return self.AttrNames(with_attr_var);
+          },
+          py::arg("with_attr_var") = false)
       .def("_set_attr", &pd::OpDesc::SetAttr)
       .def("remove_attr", &pd::OpDesc::RemoveAttr)
-      .def("attr", &pd::OpDesc::GetAttr)
+      .def(
+          "attr",
+          [](pd::OpDesc &self, const std::string &name, bool with_attr_var) {
+            return self.GetAttr(name, with_attr_var);
+          },
+          py::arg("name"),
+          py::arg("with_attr_var") = false)
+      .def("set_var_attr", &pd::OpDesc::SetVarAttr)
+      .def("set_vars_attr", &pd::OpDesc::SetVarsAttr)
       .def("set_block_attr", &pd::OpDesc::SetBlockAttr)
       .def("set_blocks_attr", &pd::OpDesc::SetBlocksAttr)
       .def("set_serialized_attr",
@@ -351,7 +391,7 @@ void BindOpDesc(pybind11::module *m) {
       .def("id", &pd::OpDesc::Id)
       .def("original_id", &pd::OpDesc::OriginalId)
       .def("set_original_id", &pd::OpDesc::SetOriginalId)
-      .def("inputs", &pd::OpDesc::Inputs)
+      .def("inputs", [](pd::OpDesc &self) { return self.Inputs(); })
       .def("outputs", &pd::OpDesc::Outputs);
 }
 
diff --git a/paddle/phi/api/yaml/legacy_api.yaml b/paddle/phi/api/yaml/legacy_api.yaml
index e90250901dc8d34b390c481fda0c5fe61bc5bdef..b67498bcc1a20c4cf914a1d4ca06d0a7bf500e78 100755
--- a/paddle/phi/api/yaml/legacy_api.yaml
+++ b/paddle/phi/api/yaml/legacy_api.yaml
@@ -667,7 +667,7 @@
   backward : divide_grad
 
 - api : dropout
-  args : (Tensor x, Tensor seed_tensor, float p, bool is_test, str mode, int seed, bool fix_seed)
+  args : (Tensor x, Tensor seed_tensor, Scalar p, bool is_test, str mode, int seed, bool fix_seed)
   output : Tensor(out), Tensor(mask)
   infer_meta :
     func : DropoutInferMeta
diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml
index b08a42ea3fde3f73ef423907ba1ea470ac1d6c4d..fdf2321ea38e12e27ef07ca4e873da7ed378d173 100755
--- a/paddle/phi/api/yaml/legacy_backward.yaml
+++ b/paddle/phi/api/yaml/legacy_backward.yaml
@@ -644,8 +644,8 @@
   backward : divide_double_grad
 
 - backward_api : dropout_grad
-  forward : dropout (Tensor x, Tensor seed_tensor, float p, bool is_test, str mode, int seed, bool fix_seed) -> Tensor(out), Tensor(mask)
-  args : (Tensor mask, Tensor out_grad, float p, bool is_test, str mode)
+  forward : dropout (Tensor x, Tensor seed_tensor, Scalar p, bool is_test, str mode, int seed, bool fix_seed) -> Tensor(out), Tensor(mask)
+  args : (Tensor mask, Tensor out_grad, Scalar p, bool is_test, str mode)
   output : Tensor(x_grad)
   infer_meta :
     func : UnchangedInferMeta
diff --git a/paddle/phi/core/enforce.cc b/paddle/phi/core/enforce.cc
index 8074fbeb4918043e121503626eb4120cb13c7c0c..4eb580955a97c9f787159637f522c550eeb4b35c 100644
--- a/paddle/phi/core/enforce.cc
+++ b/paddle/phi/core/enforce.cc
@@ -27,6 +27,7 @@ class EagerVariable;
 }
 namespace paddle {
 namespace framework {
+class VarDesc;
 class BlockDesc;
 using Attribute = paddle::variant<paddle::blank,
                                   int,
@@ -41,7 +42,9 @@ using Attribute = paddle::variant<paddle::blank,
                                   int64_t,
                                   std::vector<BlockDesc*>,
                                   std::vector<int64_t>,
-                                  std::vector<double>>;
+                                  std::vector<double>,
+                                  VarDesc*,
+                                  std::vector<VarDesc*>>;
 using AttributeMap = std::unordered_map<std::string, Attribute>;
 }  // namespace framework
 namespace imperative {
diff --git a/paddle/phi/infermeta/binary.cc b/paddle/phi/infermeta/binary.cc
index 3cb3e4d6888a4b1f49de67a9f97330dfc955f55e..44e53fc32ccffe7712848520d77e69e540bdb162 100644
--- a/paddle/phi/infermeta/binary.cc
+++ b/paddle/phi/infermeta/binary.cc
@@ -953,7 +953,7 @@ void DistInferMeta(const MetaTensor& x,
 
 void DropoutInferMeta(const MetaTensor& x,
                       const MetaTensor& seed_tensor,
-                      float p,
+                      const Scalar& p,
                       bool is_test,
                       const std::string& mode,
                       int seed,
@@ -973,7 +973,7 @@ void DropoutInferMeta(const MetaTensor& x,
 
 void DropoutNdInferMeta(const MetaTensor& x,
                         const MetaTensor& seed_tensor,
-                        float p,
+                        const Scalar& p,
                         bool is_test,
                         const std::string& mode,
                         int seed,
diff --git a/paddle/phi/infermeta/binary.h b/paddle/phi/infermeta/binary.h
index c57a702daf8b35069f4a6fd9065a4fdb2b3fd586..7dcbe33e0a9337e886a10ac8b3ffc2c782c3a918 100644
--- a/paddle/phi/infermeta/binary.h
+++ b/paddle/phi/infermeta/binary.h
@@ -143,7 +143,7 @@ void DotInferMeta(const MetaTensor& x, const MetaTensor& y, MetaTensor* out);
 
 void DropoutInferMeta(const MetaTensor& x,
                       const MetaTensor& seed_tensor,
-                      float p,
+                      const Scalar& p,
                       bool is_test,
                       const std::string& mode,
                       int seed,
@@ -153,7 +153,7 @@ void DropoutInferMeta(const MetaTensor& x,
 
 void DropoutNdInferMeta(const MetaTensor& x,
                         const MetaTensor& seed_tensor,
-                        float p,
+                        const Scalar& p,
                         bool is_test,
                         const std::string& mode,
                         int seed,
diff --git a/paddle/phi/kernels/cpu/dropout_grad_kernel.cc b/paddle/phi/kernels/cpu/dropout_grad_kernel.cc
index 42b2834aaffc9ff45230c8a8f4d96b1b6a8eedb6..445e92716a899d86b3147dd715036266444d87b3 100644
--- a/paddle/phi/kernels/cpu/dropout_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/dropout_grad_kernel.cc
@@ -24,7 +24,7 @@ template <typename T, typename Context>
 void DropoutNdGradKernel(const Context& dev_ctx,
                          const DenseTensor& mask,
                          const DenseTensor& out_grad,
-                         float p,
+                         const Scalar& p,
                          bool is_test,
                          const std::string& mode,
                          const std::vector<int>& axis,
@@ -35,6 +35,7 @@ void DropoutNdGradKernel(const Context& dev_ctx,
 
   auto dX = EigenVector<T>::Flatten(*grad_x);
   auto dY = EigenVector<T>::Flatten(*grad_y);
+  float prob = p.to<float>();
 
   auto& place = *dev_ctx.eigen_device();
   auto& dropout_implementation = mode;
@@ -42,20 +43,20 @@ void DropoutNdGradKernel(const Context& dev_ctx,
     if (dropout_implementation == "upscale_in_train") {
       dX.device(place) = static_cast<T>(1) * dY;
     } else {
-      dX.device(place) = dY * static_cast<T>(1.0f - p);
+      dX.device(place) = dY * static_cast<T>(1.0f - prob);
     }
   } else {
     std::vector<int64_t> out_dims = phi::vectorize(out_grad.dims());
     auto M = EigenVector<uint8_t>::Flatten(mask);
     if (dropout_implementation == "upscale_in_train") {
-      if (p == 1.0f) {
+      if (prob == 1.0f) {
         dX.device(place) = static_cast<T>(0) * dY;
       } else {
         if (axis.empty()) {
-          dX.device(place) = dY * M.cast<T>() / static_cast<T>(1.0f - p);
+          dX.device(place) = dY * M.cast<T>() / static_cast<T>(1.0f - prob);
         } else {
-          dX.device(place) =
-              dY * M.broadcast(out_dims).cast<T>() / static_cast<T>(1.0f - p);
+          dX.device(place) = dY * M.broadcast(out_dims).cast<T>() /
+                             static_cast<T>(1.0f - prob);
         }
       }
     } else {
@@ -72,12 +73,12 @@ template <typename T, typename Context>
 void DropoutGradRawKernel(const Context& dev_ctx,
                           const DenseTensor& mask,
                           const DenseTensor& out_grad,
-                          float p,
+                          const Scalar& p,
                           bool is_test,
                           const std::string& mode,
                           DenseTensor* x_grad) {
   DropoutNdGradKernel<T, Context>(
-      dev_ctx, mask, out_grad, p, is_test, mode, {}, x_grad);
+      dev_ctx, mask, out_grad, p.to<float>(), is_test, mode, {}, x_grad);
 }
 
 }  // namespace phi
diff --git a/paddle/phi/kernels/cpu/dropout_kernel.cc b/paddle/phi/kernels/cpu/dropout_kernel.cc
index d3ca21cfe33b9d1006d86f93c67800c97ed393dd..41c33fcf5dd3ff86b610c074874a815ea4eaa381 100644
--- a/paddle/phi/kernels/cpu/dropout_kernel.cc
+++ b/paddle/phi/kernels/cpu/dropout_kernel.cc
@@ -25,7 +25,7 @@ namespace phi {
 template <typename T, typename Context>
 void ComputeDropoutInference(const Context& ctx,
                              const DenseTensor& x,
-                             float dropout_prob,
+                             const Scalar& dropout_prob,
                              bool upscale_in_train,
                              DenseTensor* y) {
   if (upscale_in_train) {
@@ -41,7 +41,7 @@ void ComputeDropoutInference(const Context& ctx,
     auto X = EigenMatrix<T>::Reshape(x, 1);
     auto Y = EigenMatrix<T>::Reshape(*y, 1);
     auto& place = *ctx.eigen_device();
-    Y.device(place) = X * static_cast<T>(1.0f - dropout_prob);
+    Y.device(place) = X * static_cast<T>(1.0f - dropout_prob.to<float>());
   }
 }
 
@@ -49,7 +49,7 @@ template <typename T, typename Context>
 void DropoutRawKernel(const Context& dev_ctx,
                       const DenseTensor& x,
                       const paddle::optional<DenseTensor>& seed_tensor,
-                      float p,
+                      const Scalar& p,
                       bool is_test,
                       const std::string& mode,
                       int seed,
@@ -59,7 +59,7 @@ void DropoutRawKernel(const Context& dev_ctx,
   auto* y = out;
   const auto* x_data = x.data<T>();
   T* y_data = dev_ctx.template Alloc<T>(y);
-  float dropout_prob = p;
+  float dropout_prob = p.to<float>();
 
   auto& dropout_implementation = mode;
   bool upscale_in_train = (dropout_implementation == "upscale_in_train");
@@ -109,7 +109,7 @@ template <typename T, typename Context>
 void DropoutNdKernel(const Context& dev_ctx,
                      const DenseTensor& x,
                      const paddle::optional<DenseTensor>& seed_tensor,
-                     float p,
+                     const Scalar& p,
                      bool is_test,
                      const std::string& mode,
                      int seed,
@@ -120,7 +120,7 @@ void DropoutNdKernel(const Context& dev_ctx,
   auto* y = out;
   const auto* x_data = x.data<T>();
   T* y_data = dev_ctx.template Alloc<T>(y);
-  float dropout_prob = p;
+  float dropout_prob = p.to<float>();
 
   auto& dropout_implementation = mode;
   bool upscale_in_train = (dropout_implementation == "upscale_in_train");
diff --git a/paddle/phi/kernels/dropout_grad_kernel.h b/paddle/phi/kernels/dropout_grad_kernel.h
index d8d5363ad59b7298d7b4216204dc3c433152e34a..c61e4d0b8598dc2fc2c21edbea6d72c3f97ea2e7 100644
--- a/paddle/phi/kernels/dropout_grad_kernel.h
+++ b/paddle/phi/kernels/dropout_grad_kernel.h
@@ -23,7 +23,7 @@ template <typename T, typename Context>
 void DropoutGradRawKernel(const Context& dev_ctx,
                           const DenseTensor& mask,
                           const DenseTensor& out_grad,
-                          float p,
+                          const Scalar& p,
                           bool is_test,
                           const std::string& mode,
                           DenseTensor* x_grad);
@@ -32,7 +32,7 @@ template <typename T, typename Context>
 void DropoutNdGradKernel(const Context& dev_ctx,
                          const DenseTensor& mask,
                          const DenseTensor& out_grad,
-                         float p,
+                         const Scalar& p,
                          bool is_test,
                          const std::string& mode,
                          const std::vector<int>& axis,
diff --git a/paddle/phi/kernels/dropout_kernel.h b/paddle/phi/kernels/dropout_kernel.h
index cba8160058e9905fd759b8d83027edce97dc332b..ff718d641bedcedcee30dc1515476fe609d3d001 100644
--- a/paddle/phi/kernels/dropout_kernel.h
+++ b/paddle/phi/kernels/dropout_kernel.h
@@ -24,7 +24,7 @@ template <typename T, typename Context>
 void DropoutRawKernel(const Context& dev_ctx,
                       const DenseTensor& x,
                       const paddle::optional<DenseTensor>& seed_tensor,
-                      float p,
+                      const Scalar& p,
                       bool is_test,
                       const std::string& mode,
                       int seed,
@@ -36,7 +36,7 @@ template <typename T, typename Context>
 void DropoutNdKernel(const Context& dev_ctx,
                      const DenseTensor& x,
                      const paddle::optional<DenseTensor>& seed_tensor,
-                     float p,
+                     const Scalar& p,
                      bool is_test,
                      const std::string& mode,
                      int seed,
diff --git a/paddle/phi/kernels/gpu/dropout_grad_kernel.cu b/paddle/phi/kernels/gpu/dropout_grad_kernel.cu
index 841d98fbc003e547f5a11057b70b7dd6be6bc019..4aa59cded8f379815b6eaed32b2e6de85a48bdc3 100644
--- a/paddle/phi/kernels/gpu/dropout_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/dropout_grad_kernel.cu
@@ -24,29 +24,41 @@ template <typename T, typename Context>
 void DropoutGradRawKernel(const Context& dev_ctx,
                           const DenseTensor& mask,
                           const DenseTensor& out_grad,
-                          float p,
+                          const Scalar& p,
                           bool is_test,
                           const std::string& mode,
                           DenseTensor* x_grad) {
   bool upscale_in_train = (mode == "upscale_in_train");
   x_grad->mutable_data<T>(dev_ctx.GetPlace());
-  paddle::operators::DropoutGradGPUKernelDriver<T>(
-      dev_ctx, is_test, p, upscale_in_train, out_grad, mask, x_grad, false);
+  paddle::operators::DropoutGradGPUKernelDriver<T>(dev_ctx,
+                                                   is_test,
+                                                   p.to<float>(),
+                                                   upscale_in_train,
+                                                   out_grad,
+                                                   mask,
+                                                   x_grad,
+                                                   false);
 }
 
 template <typename T, typename Context>
 void DropoutNdGradKernel(const Context& dev_ctx,
                          const DenseTensor& mask,
                          const DenseTensor& out_grad,
-                         float p,
+                         const Scalar& p,
                          bool is_test,
                          const std::string& mode,
                          const std::vector<int>& axis,
                          DenseTensor* x_grad) {
   bool upscale_in_train = (mode == "upscale_in_train");
   dev_ctx.template Alloc<T>(x_grad);
-  paddle::operators::DropoutGradGPUKernelDriver<T>(
-      dev_ctx, is_test, p, upscale_in_train, out_grad, mask, x_grad, true);
+  paddle::operators::DropoutGradGPUKernelDriver<T>(dev_ctx,
+                                                   is_test,
+                                                   p.to<float>(),
+                                                   upscale_in_train,
+                                                   out_grad,
+                                                   mask,
+                                                   x_grad,
+                                                   true);
 }
 
 }  // namespace phi
diff --git a/paddle/phi/kernels/gpu/dropout_kernel.cu b/paddle/phi/kernels/gpu/dropout_kernel.cu
index f973bb8e15fc75b19e98d8a8116f380699119fe9..0f2a8d9c938484f8bd3e75de5907faadd8116eec 100644
--- a/paddle/phi/kernels/gpu/dropout_kernel.cu
+++ b/paddle/phi/kernels/gpu/dropout_kernel.cu
@@ -24,7 +24,7 @@ template <typename T, typename Context>
 void DropoutRawKernel(const Context& dev_ctx,
                       const DenseTensor& x,
                       const paddle::optional<DenseTensor>& seed_tensor,
-                      float p,
+                      const Scalar& p,
                       bool is_test,
                       const std::string& mode,
                       int seed,
@@ -36,7 +36,7 @@ void DropoutRawKernel(const Context& dev_ctx,
   mask->mutable_data<uint8_t>(dev_ctx.GetPlace());
   paddle::operators::DropoutFwGPUKernelDriver<T>(dev_ctx,
                                                  is_test,
-                                                 p,
+                                                 p.to<float>(),
                                                  upscale_in_train,
                                                  fix_seed,
                                                  seed,
@@ -51,7 +51,7 @@ template <typename T, typename Context>
 void DropoutNdKernel(const Context& dev_ctx,
                      const DenseTensor& x,
                      const paddle::optional<DenseTensor>& seed_tensor,
-                     float p,
+                     const Scalar& p,
                      bool is_test,
                      const std::string& mode,
                      int seed,
@@ -64,7 +64,7 @@ void DropoutNdKernel(const Context& dev_ctx,
   dev_ctx.template Alloc<uint8_t>(mask);
   paddle::operators::DropoutFwGPUKernelDriver<T>(dev_ctx,
                                                  is_test,
-                                                 p,
+                                                 p.to<float>(),
                                                  upscale_in_train,
                                                  fix_seed,
                                                  seed,
diff --git a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/random.py b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/random.py
index fdbf0312db664e54dd4711df6f91a1796887aeff..55c6a3308b8c12b972d2987a29fc649b75e87bc9 100644
--- a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/random.py
+++ b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/random.py
@@ -18,7 +18,7 @@ import numpy as np
 from paddle import _C_ops
 from paddle.fluid import core
 from paddle.fluid.data_feeder import check_variable_and_dtype
-from paddle.fluid.framework import _non_static_mode, default_main_program
+from paddle.fluid.framework import _non_static_mode, default_main_program, Variable
 from paddle.fluid.layer_helper import LayerHelper
 
 __all__ = []
@@ -187,11 +187,12 @@ def dropout(x,
     if rng_name is None:
         return paddle.nn.functional.dropout(x, p, axis, training, mode, name)
 
+    if not isinstance(p, (float, int, Variable)):
+        raise TypeError("p argument should be a number(int|float) or Variable")
+
     # fast return for p == 0
-    if p == 0: return x
+    if isinstance(p, (int, float)) and p == 0: return x
 
-    assert isinstance(p, (float, int)), \
-        TypeError("p argument should be a number")
     assert 0 <= p <= 1, ValueError("p argument should between 0 and 1")
     assert mode in ('downscale_in_infer', 'upscale_in_train'), \
         ValueError(
@@ -211,6 +212,11 @@ def dropout(x,
 
     seed = determinate_seed(rng_name)
 
+    if isinstance(p, Variable) and not p.shape != [1]:
+        raise TypeError(
+            "Required p.shape == [1] if type(p) is Variable, but received p.shape = {}"
+            .format(p.shape))
+
     helper = LayerHelper('dropout', **locals())
     check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
                              'dropout')
diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py
index 4ce4801d32bd6f7eba256cc41e8a98594b5ae2b2..8db5a4353aeebe386e2520a1e6a7b773d3320d47 100644
--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -1262,6 +1262,17 @@ def _varbase_creator(type=core.VarDesc.VarType.LOD_TENSOR,
                             True if persistable else False)
 
 
+def _all_is_type(vals, expected_type):
+    """
+    Return True if type of each element is expected_type.
+
+    NOTE: BuiltIn all() will always return True if vals is empty.
+    """
+    assert isinstance(vals, (list, tuple))
+    if not vals: return False
+    return all(isinstance(v, expected_type) for v in vals)
+
+
 class VariableMetaClass(type):
 
     @classmethod
@@ -2934,7 +2945,28 @@ class Operator(object):
             if skip_op_callstack and name == "op_callstack":
                 continue
 
-            attr_type = self.desc.attr_type(name)
+            attr_type = self.desc.attr_type(name, True)
+            if attr_type == core.AttrType.VAR:
+                attr_var_name = self.desc.attr(name, True).name()
+                a = "{name} = Var['{value}']".format(name=name,
+                                                     type=attr_type,
+                                                     value=attr_var_name)
+                attrs_str += a
+                if i != len(attr_names) - 1:
+                    attrs_str += ", "
+                continue
+
+            if attr_type == core.AttrType.VARS:
+                attr_var_names = [
+                    "'%s'" % var.name() for var in self.desc.attr(name, True)
+                ]
+                a = "{name} = Vars[{value}]".format(
+                    name=name, type=attr_type, value=','.join(attr_var_names))
+                attrs_str += a
+                if i != len(attr_names) - 1:
+                    attrs_str += ", "
+                continue
+
             if attr_type == core.AttrType.BLOCK:
                 a = "{name} = block[{value}]".format(
                     name=name, type=attr_type, value=self._block_attr_id(name))
@@ -3128,10 +3160,13 @@ class Operator(object):
         Raises:
             ValueError: If the type of value doesn't match with desc.attr_type(name).
         """
-        if isinstance(val, Block):
+        if isinstance(val, Variable):
+            self.desc.set_var_attr(name, val.desc)
+        elif isinstance(val, list) and _all_is_type(val, Variable):
+            self.desc.set_vars_attr(name, [v.desc for v in val])
+        elif isinstance(val, Block):
             self.desc.set_block_attr(name, val.desc)
-        elif isinstance(val, list) and val and all(
-                isinstance(v, Block) for v in val):
+        elif isinstance(val, list) and val and _all_is_type(val, Block):
             self.desc.set_blocks_attr(name, [v.desc for v in val])
         elif isinstance(val, core.BlockDesc) or \
                 isinstance(val, core.ProgramDesc):
@@ -3141,7 +3176,7 @@ class Operator(object):
 
     @property
     def attr_names(self):
-        return self.desc.attr_names()
+        return self.desc.attr_names(True)
 
     def attr(self, name):
         """
@@ -4392,10 +4427,13 @@ class IrOpNode(IrNode):
         assert self.node.op() is not None, \
             "The node operator description can not be None."
         desc = self.node.op()
-        if isinstance(val, Block):
+        if isinstance(val, Variable):
+            desc.set_var_attr(name, val.desc)
+        elif isinstance(val, list) and _all_is_type(val, Variable):
+            desc.set_vars_attr(name, [v.desc for v in val])
+        elif isinstance(val, Block):
             desc.set_block_attr(name, val.desc)
-        elif isinstance(val, list) and val and \
-                all(isinstance(v, Block) for v in val):
+        elif isinstance(val, list) and val and _all_is_type(val, Block):
             desc.set_blocks_attr(name, [v.desc for v in val])
         elif isinstance(val, core.BlockDesc) or \
                 isinstance(val, core.ProgramDesc):
@@ -4850,10 +4888,13 @@ class IrGraph(object):
         """
         Update the value of desc's attribute by attribute's name.
         """
-        if isinstance(val, Block):
+        if isinstance(val, Variable):
+            desc.set_var_attr(name, val.desc)
+        elif isinstance(val, list) and _all_is_type(val, Variable):
+            desc.set_vars_attr(name, [v.desc for v in val])
+        elif isinstance(val, Block):
             desc.set_block_attr(name, val.desc)
-        elif isinstance(val, list) and val and all(
-                isinstance(v, Block) for v in val):
+        elif isinstance(val, list) and val and _all_is_type(val, Block):
             desc.set_blocks_attr(name, [v.desc for v in val])
         elif isinstance(val, core.BlockDesc) or \
                 isinstance(val, core.ProgramDesc):
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 5b4fe640d01fc8080fd391be4f981f0a1a60b44b..3ce7c453613b54687380fb9148d3c809aa3f7c26 100755
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -1132,8 +1132,11 @@ def dropout(x,
             x = fluid.data(name="data", shape=[None, 32, 32], dtype="float32")
             dropped = fluid.layers.dropout(x, dropout_prob=0.5)
     """
+    if not isinstance(dropout_prob, (float, int, Variable)):
+        raise TypeError(
+            "dropout_prob argument should be a number(int|float) or Variable")
     # fast return for p == 0
-    if dropout_prob == 0:
+    if isinstance(dropout_prob, (int, float)) and dropout_prob == 0:
         return x
 
     if _non_static_mode():
@@ -1152,6 +1155,10 @@ def dropout(x,
     def get_attrs(prog, dropout_prob, is_test, seed):
         if (seed is None or seed == 0) and prog.random_seed != 0:
             seed = prog.random_seed
+        if isinstance(dropout_prob, Variable) and not dropout_prob.shape != [1]:
+            raise TypeError(
+                "Required dropout_prob.shape == [1] if type(dropout_prob) is Variable, but received dropout_prob.shape = {}"
+                .format(dropout_prob.shape))
         attrs = {
             'dropout_prob': dropout_prob,
             'is_test': is_test,
diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py
index 6862ac1fd66346ffca7ad0189261b85c65300723..dbbc207fba4014d4088542883a07bd8070a92c6b 100644
--- a/python/paddle/fluid/layers/tensor.py
+++ b/python/paddle/fluid/layers/tensor.py
@@ -390,9 +390,7 @@ def concat(input, axis=0, name=None):
         attrs = {}
         if isinstance(axis, Variable):
             axis.stop_gradient = True
-            inputs['AxisTensor'] = axis
-        else:
-            attrs['axis'] = axis
+        attrs['axis'] = axis
 
         helper.append_op(type='concat',
                          inputs=inputs,
diff --git a/python/paddle/fluid/tests/unittests/test_attribute_var.py b/python/paddle/fluid/tests/unittests/test_attribute_var.py
new file mode 100644
index 0000000000000000000000000000000000000000..a79a8d400a36339bd6c58279d5def541bea648dd
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_attribute_var.py
@@ -0,0 +1,158 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import unittest
+import tempfile
+import paddle
+import paddle.inference as paddle_infer
+from paddle.fluid.framework import program_guard, Program
+import numpy as np
+
+paddle.enable_static()
+
+
+class UnittestBase(unittest.TestCase):
+
+    def setUp(self):
+        self.temp_dir = tempfile.TemporaryDirectory()
+        self.init_info()
+
+    def tearDwon(self):
+        self.temp_dir.cleanup()
+
+    def init_info(self):
+        self.shapes = None
+        self.save_path = None
+
+    def infer_prog(self):
+        config = paddle_infer.Config(self.save_path + '.pdmodel',
+                                     self.save_path + '.pdiparams')
+        predictor = paddle_infer.create_predictor(config)
+        input_names = predictor.get_input_names()
+        for i, shape in enumerate(self.shapes):
+            input_handle = predictor.get_input_handle(input_names[i])
+            fake_input = np.random.randn(*shape).astype("float32")
+            input_handle.reshape(shape)
+            input_handle.copy_from_cpu(fake_input)
+        predictor.run()
+        output_names = predictor.get_output_names()
+        output_handle = predictor.get_output_handle(output_names[0])
+        output_data = output_handle.copy_to_cpu()
+
+        return output_data
+
+
+class TestDropout(UnittestBase):
+
+    def init_info(self):
+        self.shapes = [[10, 10]]
+        self.save_path = os.path.join(self.temp_dir.name, 'dropout')
+
+    def test_static(self):
+        main_prog = Program()
+        starup_prog = Program()
+        with program_guard(main_prog, starup_prog):
+            fc = paddle.nn.Linear(10, 10)
+            x = paddle.randn(self.shapes[0])
+            x.stop_gradient = False
+            feat = fc(x)
+            # p is a Variable
+            p = paddle.randn([1])
+            out = paddle.nn.functional.dropout(feat, p=p)
+            sgd = paddle.optimizer.SGD()
+            sgd.minimize(paddle.mean(out))
+            # test _to_string
+            self.assertTrue("Var[" in str(main_prog))
+
+            exe = paddle.static.Executor()
+            exe.run(starup_prog)
+            res = exe.run(fetch_list=[x, out])
+            # export model
+            paddle.static.save_inference_model(self.save_path, [x], [out], exe)
+
+            # Test for Inference Predictor
+            infer_out = self.infer_prog()
+            self.assertEqual(infer_out.shape, (10, 10))
+
+
+class TestTileTensorList(UnittestBase):
+
+    def init_info(self):
+        self.shapes = [[2, 3, 4]]
+        self.save_path = os.path.join(self.temp_dir.name, 'tile_tensors')
+
+    def test_static(self):
+        main_prog = Program()
+        starup_prog = Program()
+        with program_guard(main_prog, starup_prog):
+            fc = paddle.nn.Linear(4, 10)
+            x = paddle.randn([2, 3, 4])
+            x.stop_gradient = False
+            feat = fc(x)
+            shape0 = paddle.full([1], 1, dtype='int32')
+            shape1 = paddle.full([1], 2, dtype='int32')
+            shape = [3, shape1, shape0]
+            out = paddle.tile(feat, shape)
+
+            sgd = paddle.optimizer.SGD()
+            sgd.minimize(paddle.mean(out))
+            self.assertTrue("Vars[" in str(main_prog))
+
+            exe = paddle.static.Executor()
+            exe.run(starup_prog)
+            res = exe.run(fetch_list=[x, out])
+            self.assertEqual(res[1].shape, (6, 6, 10))
+
+            paddle.static.save_inference_model(self.save_path, [x], [out], exe)
+            # Test for Inference Predictor
+            infer_out = self.infer_prog()
+            self.assertEqual(infer_out.shape, (6, 6, 10))
+
+
+class TestTileTensor(UnittestBase):
+
+    def init_info(self):
+        self.shapes = [[2, 3, 4]]
+        self.save_path = os.path.join(self.temp_dir.name, 'tile_tensor')
+
+    def test_static(self):
+        main_prog = Program()
+        starup_prog = Program()
+        with program_guard(main_prog, starup_prog):
+            fc = paddle.nn.Linear(4, 10)
+            x = paddle.randn([2, 3, 4])
+            x.stop_gradient = False
+            feat = fc(x)
+            # shape is a Variable
+            shape = paddle.assign([3, 2, 1])
+            out = paddle.tile(feat, shape)
+
+            sgd = paddle.optimizer.SGD()
+            sgd.minimize(paddle.mean(out))
+            self.assertTrue("Var[" in str(main_prog))
+
+            exe = paddle.static.Executor()
+            exe.run(starup_prog)
+            res = exe.run(fetch_list=[x, out])
+            self.assertEqual(res[1].shape, (6, 6, 10))
+
+            paddle.static.save_inference_model(self.save_path, [x], [out], exe)
+            # Test for Inference Predictor
+            infer_out = self.infer_prog()
+            self.assertEqual(infer_out.shape, (6, 6, 10))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_dropout_op.py b/python/paddle/fluid/tests/unittests/test_dropout_op.py
index 33992b1881ec4363e8e9f9f056e37697b6d0a3cd..30c4201a0b943c0960254d343829e5ce9811a958 100644
--- a/python/paddle/fluid/tests/unittests/test_dropout_op.py
+++ b/python/paddle/fluid/tests/unittests/test_dropout_op.py
@@ -1103,6 +1103,47 @@ class TestDropoutBackward(unittest.TestCase):
                             self.cal_grad_upscale_train(mask.numpy(), prob)))
 
 
+class TestDropOutWithProbTensor(unittest.TestCase):
+
+    def setUp(self):
+        shapes = [[10, 10], [10, 10, 10], [10, 10, 10, 10]]
+        self.inputs = [
+            np.random.random(shape).astype("float32") for shape in shapes
+        ]
+        self.place = paddle.CUDAPlace(
+            0) if paddle.is_compiled_with_cuda() else paddle.CPUPlace()
+
+    def api_case(self, x):
+        p = paddle.assign([0.5])
+        out = paddle.nn.functional.dropout(x=x, p=p, training=True)
+        return out
+
+    def run_static(self, x):
+        paddle.seed(2022)
+        main_program = Program()
+
+        with program_guard(main_program):
+            input = paddle.static.data(shape=x.shape, name='x', dtype='float32')
+            out = self.api_case(input)
+
+            exe = paddle.static.Executor(self.place)
+            res = exe.run(feed={'x': x}, fetch_list=[out])
+
+        return res[0]
+
+    def run_dygraph(self, x):
+        paddle.seed(2022)
+        with fluid.dygraph.guard(self.place):
+            out = self.api_case(paddle.to_tensor(x))
+        return out
+
+    def test_p_tensor(self):
+        for x in self.inputs:
+            static_res = self.run_static(x)
+            dygraph_res = self.run_dygraph(x)
+            self.assertTrue(np.array_equal(static_res, dygraph_res))
+
+
 class TestRandomValue(unittest.TestCase):
 
     def test_fixed_random_number(self):
diff --git a/python/paddle/nn/functional/common.py b/python/paddle/nn/functional/common.py
index ba75fb23b8b18b344c486e7f739f008a19c8a9e8..a7fad9a7c81e34a2acc92f265846d339d3e81105 100644
--- a/python/paddle/nn/functional/common.py
+++ b/python/paddle/nn/functional/common.py
@@ -1004,14 +1004,14 @@ def dropout(x,
             print(y_01)
 
     """
-    # fast return for p == 0
-    if p == 0:
-        return x
-
-    if not isinstance(p, (float, int)):
-        raise TypeError("p argument should be a number")
-    if p < 0 or p > 1:
-        raise ValueError("p argument should between 0 and 1")
+    if not isinstance(p, (float, int, Variable)):
+        raise TypeError("p argument should be a number or Variable")
+
+    if isinstance(p, (int, float)):
+        # fast return for p == 0
+        if p == 0: return x
+        elif p < 0 or p > 1:
+            raise ValueError("p argument should between 0 and 1")
     if mode not in ('downscale_in_infer', 'upscale_in_train'):
         raise ValueError(
             "mode argument should be 'downscale_in_infer' or 'upscale_in_train'"
@@ -1050,6 +1050,12 @@ def dropout(x,
         def get_attrs(prog, dropout_prob, is_test, seed):
             if (seed is None or seed == 0) and prog.random_seed != 0:
                 seed = prog.random_seed
+
+            if isinstance(dropout_prob,
+                          Variable) and not dropout_prob.shape != [1]:
+                raise TypeError(
+                    "Required p.shape == [1] if type(p) is Variable, but received p.shape = {}"
+                    .format(p.shape))
             attrs = {
                 'dropout_prob': dropout_prob,
                 'is_test': is_test,
diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py
index 0b1a22865d506ae37079d0ff469348a51322e8af..05dc8035c5d59e4090f082b4c4005f234d16cf21 100755
--- a/python/paddle/tensor/manipulation.py
+++ b/python/paddle/tensor/manipulation.py
@@ -1117,9 +1117,7 @@ def concat(x, axis=0, name=None):
         attrs = {}
         if isinstance(axis, Variable):
             axis.stop_gradient = True
-            inputs['AxisTensor'] = axis
-        else:
-            attrs['axis'] = axis
+        attrs['axis'] = axis
 
         helper.append_op(type='concat',
                          inputs=inputs,
@@ -2937,13 +2935,11 @@ def tile(x, repeat_times, name=None):
 
     if isinstance(repeat_times, Variable):
         repeat_times.stop_gradient = True
-        inputs['RepeatTimes'] = repeat_times
-        attrs['repeat_times'] = [-1]
+        attrs['repeat_times'] = repeat_times
     elif isinstance(repeat_times, (list, tuple)):
         attrs['repeat_times'] = get_attr_repeat_times(repeat_times)
         if utils._contain_var(repeat_times):
-            inputs['repeat_times_tensor'] = utils._convert_to_tensor_list(
-                repeat_times)
+            attrs['repeat_times'] = utils._convert_to_tensor_list(repeat_times)
 
     dtype = helper.input_dtype(input_param_name='x')
     out = helper.create_variable_for_type_inference(dtype)