diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 2e189e4865f4d820d09d5b135cadbb31f9500279..d4d5f4903f8639fcecd1a0c986e036c4eacd8aaf 100755 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -438,6 +438,7 @@ if(WITH_XPU) SRCS operator.cc DEPS xpu_op_list op_info + proto_desc device_context tensor scope @@ -462,6 +463,7 @@ else() operator SRCS operator.cc DEPS op_info + proto_desc device_context tensor scope diff --git a/paddle/fluid/framework/attribute.cc b/paddle/fluid/framework/attribute.cc index 13f175ce0b1cd94bd550fdac16cec652a29c5e50..dd456b147ac8d67f262a8e9708023ada77aa4978 100644 --- a/paddle/fluid/framework/attribute.cc +++ b/paddle/fluid/framework/attribute.cc @@ -42,6 +42,10 @@ paddle::any GetAttrValue(const Attribute& attr) { return PADDLE_GET_CONST(std::vector, attr); case proto::AttrType::FLOAT64S: return PADDLE_GET_CONST(std::vector, attr); + case proto::AttrType::VAR: + return PADDLE_GET_CONST(VarDesc*, attr); + case proto::AttrType::VARS: + return PADDLE_GET_CONST(std::vector, attr); case proto::AttrType::BLOCK: return PADDLE_GET_CONST(BlockDesc*, attr); case proto::AttrType::BLOCKS: diff --git a/paddle/fluid/framework/attribute.h b/paddle/fluid/framework/attribute.h index a149c18f542e2542cb0535a132f3003881a7a0b6..4d3ba2a1820be55807d9d2122b9a75f8b828da21 100644 --- a/paddle/fluid/framework/attribute.h +++ b/paddle/fluid/framework/attribute.h @@ -221,6 +221,28 @@ inline proto::AttrType AttrTypeID(const Attribute& attr) { return static_cast(attr.index() - 1); } +inline bool IsAttrVar(const Attribute& attr) { + return AttrTypeID(attr) == proto::AttrType::VAR; +} + +inline bool IsAttrVars(const Attribute& attr) { + return AttrTypeID(attr) == proto::AttrType::VARS; +} + +inline bool HasAttrVar(const Attribute& attr) { + return IsAttrVar(attr) || IsAttrVars(attr); +} + +inline AttributeMap FilterAttrVar(const AttributeMap& attrs) { + AttributeMap attrs_var; + for (auto& attr : attrs) { + if (HasAttrVar(attr.second)) { + attrs_var.emplace(attr); + } + } + return attrs_var; +} + class AttrReader { public: explicit AttrReader(const AttributeMap& attrs) @@ -414,9 +436,15 @@ class TypedAttrChecker { } return; } + // If attribute is VarDesc(s), we should verify it's dtype and shape. + auto it = attr_map->find(attr_name_); + if (it != attr_map->end() && HasAttrVar(it->second)) { + VLOG(1) << "Found Attribute " << attr_name_ + << " with Variable, skip attr_checker."; + return; + } if (only_check_exist_value) { - auto it = attr_map->find(attr_name_); if (it != attr_map->end()) { ExtractAttribute extract_attr(attr_name_); T* attr_value = extract_attr(it->second); @@ -425,7 +453,6 @@ class TypedAttrChecker { } } } else { - auto it = attr_map->find(attr_name_); if (it == attr_map->end()) { // user do not set this attr PADDLE_ENFORCE_EQ( diff --git a/paddle/fluid/framework/attribute_test.cc b/paddle/fluid/framework/attribute_test.cc index 8a47e41d38359ca6b0de53db9629ec1b0b8e8ceb..ab6f71926b80f510fdefb16096b976a08d0252db 100644 --- a/paddle/fluid/framework/attribute_test.cc +++ b/paddle/fluid/framework/attribute_test.cc @@ -19,6 +19,7 @@ #include "gtest/gtest.h" #include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/framework/var_desc.h" #include "paddle/utils/any.h" TEST(Attribute, GetAttrValueToAny) { @@ -72,6 +73,25 @@ TEST(Attribute, GetAttrValueToAny) { EXPECT_EQ(vec_bool[0], true); EXPECT_EQ(vec_bool[1], true); + paddle::framework::VarDesc var_desc("axis"); + paddle::framework::Attribute var_attr(&var_desc); + auto rlt_var_attr = paddle::framework::GetAttrValue(var_attr); + auto var_desc_ptr = + paddle::any_cast(rlt_var_attr); + EXPECT_NE(var_desc_ptr, nullptr); + EXPECT_EQ(var_desc_ptr->Name(), var_desc.Name()); + + paddle::framework::VarDesc var2_desc("prob"); + std::vector vars_desc{&var_desc, &var2_desc}; + paddle::framework::Attribute vars_attr(vars_desc); + + auto rlt_vars_attr = paddle::framework::GetAttrValue(vars_attr); + auto rlt_vars_desc = + paddle::any_cast>(rlt_vars_attr); + EXPECT_EQ(rlt_vars_desc.size(), vars_desc.size()); + EXPECT_EQ(rlt_vars_desc[0]->Name(), vars_desc[0]->Name()); + EXPECT_EQ(rlt_vars_desc[1]->Name(), vars_desc[1]->Name()); + paddle::framework::ProgramDesc prog; paddle::framework::proto::BlockDesc proto_block; paddle::framework::BlockDesc block_desc(&prog, &proto_block); diff --git a/paddle/fluid/framework/block_desc.cc b/paddle/fluid/framework/block_desc.cc index 84d52c996d0562e6feadfc573c61ec77cf2703f9..d9dd10f7165374dbe64c4eb66bd162c36a1ab99d 100644 --- a/paddle/fluid/framework/block_desc.cc +++ b/paddle/fluid/framework/block_desc.cc @@ -217,13 +217,15 @@ BlockDesc::BlockDesc(const BlockDesc &other, ProgramDesc *prog) : prog_(prog), desc_(desc) { need_update_ = true; - for (auto &op : other.ops_) { - ops_.emplace_back(new OpDesc(*op, this)); - } + // NOTE(dev): Init vars_ firstly so we can find them + // while constructing OpDesc. for (auto &it : other.vars_) { auto *var = new VarDesc(*it.second); vars_[it.first].reset(var); } + for (auto &op : other.ops_) { + ops_.emplace_back(new OpDesc(*op, this)); + } } void BlockDesc::SetForwardBlockID(int32_t forward_block_id) { @@ -273,7 +275,10 @@ void BlockDesc::MoveFrom(BlockDesc *block) { const auto &attr_name = pair.first; const auto &attr_value = pair.second; auto attr_type = static_cast(attr_value.index() - 1); - if (attr_type == proto::AttrType::BLOCK) { + if (attr_type == proto::AttrType::VAR || + attr_type == proto::AttrType::VARS) { + dst_op->UpdateVarAttr(attr_name, attr_value); + } else if (attr_type == proto::AttrType::BLOCK) { auto block_id = PADDLE_GET_CONST(BlockDesc *, attr_value)->ID(); dst_op->SetBlockAttr(attr_name, prog_->MutableBlock(block_id)); VLOG(10) << "Set block attr " << attr_name << " id " << block_id; diff --git a/paddle/fluid/framework/framework.proto b/paddle/fluid/framework/framework.proto index 0d3e7c2741c17b2649c7e73a3c97d3f117c6027f..391197d967abe29784eeccd162baa4003b47b2ae 100644 --- a/paddle/fluid/framework/framework.proto +++ b/paddle/fluid/framework/framework.proto @@ -36,6 +36,8 @@ enum AttrType { BLOCKS = 10; LONGS = 11; FLOAT64S = 12; + VAR = 13; + VARS = 14; } message ProcessMeshDesc { @@ -65,6 +67,8 @@ message OpDesc { repeated int32 blocks_idx = 14; repeated int64 longs = 15; repeated double float64s = 16; + optional string var_name = 17; + repeated string vars_name = 18; }; message Var { diff --git a/paddle/fluid/framework/infershape_utils.cc b/paddle/fluid/framework/infershape_utils.cc index c525888ca116c9cd164123d758395eb3c2632c7a..eb988d59a2a8b51b1480184c8990e18f1ff52967 100644 --- a/paddle/fluid/framework/infershape_utils.cc +++ b/paddle/fluid/framework/infershape_utils.cc @@ -451,12 +451,13 @@ CompatInferMetaContext BuildInferMetaContext(InferShapeContext* ctx, auto attr_reader = ctx->Attrs(); for (size_t i = 0; i < attr_names.size(); ++i) { auto& attr_name = attr_names[i]; - VLOG(6) << "BuildInferMetaContext: " << attr_name << ": " - << attr_defs[i].type_index; auto* attr_ptr = attr_reader.GetAttr(attr_name); + bool is_attr_var = attr_ptr != nullptr && HasAttrVar(*attr_ptr); + VLOG(6) << "BuildInferMetaContext: " << attr_name << ": " + << attr_defs[i].type_index << ", is_attr_var: " << is_attr_var; switch (attr_defs[i].type_index) { case phi::AttributeType::SCALAR: - if (attr_ptr) { + if (attr_ptr && !is_attr_var) { auto& attr = *attr_ptr; switch (AttrTypeID(attr)) { case framework::proto::AttrType::FLOAT: @@ -502,7 +503,7 @@ CompatInferMetaContext BuildInferMetaContext(InferShapeContext* ctx, break; case phi::AttributeType::INT_ARRAY: // When attr is a vector_tensor or tensor, transform it to IntArray - if (attr_ptr) { + if (attr_ptr && !is_attr_var) { auto& attr = *attr_ptr; switch (AttrTypeID(attr)) { case framework::proto::AttrType::INTS: diff --git a/paddle/fluid/framework/ir/graph.cc b/paddle/fluid/framework/ir/graph.cc index fceed0fc44e5f6baa274254ad4307b78cac0fb12..6946fb6d7d9eee9ed63f55682ddb1e1bc80283b9 100644 --- a/paddle/fluid/framework/ir/graph.cc +++ b/paddle/fluid/framework/ir/graph.cc @@ -149,7 +149,7 @@ std::map> Graph::InitFromBlock( ++desc_order; // For input args, reuse the same var name if it was created before. // Otherwise, create a new one. - for (auto &each_var_name : op->InputArgumentNames()) { + for (auto &each_var_name : op->InputArgumentNames(true)) { not_visited_vars.erase(each_var_name); ir::Node *var = nullptr; if (var_nodes.find(each_var_name) != var_nodes.end()) { diff --git a/paddle/fluid/framework/op_desc.cc b/paddle/fluid/framework/op_desc.cc index 52ac86d060694076f1906d5d78e7cd23bcde8cae..cc7923602647fc527c9fd5fa25333b544dd2d931 100644 --- a/paddle/fluid/framework/op_desc.cc +++ b/paddle/fluid/framework/op_desc.cc @@ -425,6 +425,9 @@ OpDesc::OpDesc(const OpDesc &other, BlockDesc *block) { CopyFrom(other); block_ = block; need_update_ = true; + for (auto &iter : attrs_) { + UpdateVarAttr(iter.first, iter.second); + } } void OpDesc::CopyFrom(const OpDesc &op_desc) { @@ -465,9 +468,13 @@ OpDesc::OpDesc(const proto::OpDesc &desc, BlockDesc *block) for (const proto::OpDesc::Attr &attr : desc_.attrs()) { std::string attr_name = attr.name(); // The sub_block referred to by the BLOCK attr hasn't been added - // to ProgramDesc class yet, we skip setting BLOCK/BLOCKS attr here. - if (attr.type() != proto::AttrType::BLOCK && - attr.type() != proto::AttrType::BLOCKS) { + // to ProgramDesc class yet, we skip setting BLOCK/BLOCKS/VAR/VARS attr + // here. + auto attr_type = attr.type(); + if (attr_type != proto::AttrType::BLOCK && + attr_type != proto::AttrType::BLOCKS && + attr_type != proto::AttrType::VAR && + attr_type != proto::AttrType::VARS) { attrs_[attr_name] = GetAttrValue(attr); } } @@ -489,9 +496,31 @@ const std::vector &OpDesc::Input(const std::string &name) const { return it->second; } -std::vector OpDesc::InputArgumentNames() const { +std::vector OpDesc::Input(const std::string &name, + bool with_attr_var) const { + // Attribute with VarDesc type will consider as Input + if (with_attr_var) { + auto it = attrs_.find(name); + if (it != attrs_.end() && HasAttrVar(it->second)) + return AttrVarNames(it->second); + } + return this->Input(name); +} + +VariableNameMap OpDesc::Inputs(bool with_attr_var) const { + if (!with_attr_var) { + return inputs_; + } + VariableNameMap res = inputs_; + for (auto &attr : FilterAttrVar(attrs_)) { + res[attr.first] = AttrVarNames(attr.second); + } + return res; +} + +std::vector OpDesc::InputArgumentNames(bool with_attr_var) const { std::vector retv; - for (auto &ipt : this->inputs_) { + for (auto &ipt : this->Inputs(with_attr_var)) { retv.insert(retv.end(), ipt.second.begin(), ipt.second.end()); } return retv; @@ -558,24 +587,31 @@ bool OpDesc::HasProtoAttr(const std::string &name) const { return false; } -proto::AttrType OpDesc::GetAttrType(const std::string &name) const { - auto it = attrs_.find(name); - PADDLE_ENFORCE_NE( - it, - attrs_.end(), - platform::errors::NotFound("Attribute %s is not found.", name)); - return static_cast(it->second.index() - 1); +proto::AttrType OpDesc::GetAttrType(const std::string &name, + bool with_attr_var) const { + auto attr = this->GetAttr(name, with_attr_var); + return static_cast(attr.index() - 1); } -std::vector OpDesc::AttrNames() const { +std::vector OpDesc::AttrNames(bool with_attr_var) const { std::vector retv; retv.reserve(attrs_.size()); for (auto &attr : attrs_) { + if (!with_attr_var && HasAttrVar(attr.second)) continue; retv.push_back(attr.first); } return retv; } +bool OpDesc::HasAttr(const std::string &name, bool with_attr_var) const { + auto iter = attrs_.find(name); + bool is_found = iter != attrs_.end(); + if (with_attr_var) { + return is_found; + } + return is_found && !HasAttrVar(iter->second); +} + void OpDesc::RemoveAttr(const std::string &name) { attrs_.erase(name); need_update_ = true; @@ -647,6 +683,16 @@ void OpDesc::SetAttr(const std::string &name, const Attribute &v) { need_update_ = true; } +void OpDesc::SetVarAttr(const std::string &name, VarDesc *var) { + this->attrs_[name] = var; + need_update_ = true; +} + +void OpDesc::SetVarsAttr(const std::string &name, std::vector vars) { + this->attrs_[name] = vars; + need_update_ = true; +} + void OpDesc::SetBlockAttr(const std::string &name, BlockDesc *block) { this->attrs_[name] = block; need_update_ = true; @@ -664,12 +710,18 @@ void OpDesc::SetAttrMap( need_update_ = true; } -Attribute OpDesc::GetAttr(const std::string &name) const { +Attribute OpDesc::GetAttr(const std::string &name, bool with_attr_var) const { auto it = attrs_.find(name); PADDLE_ENFORCE_NE( it, attrs_.end(), platform::errors::NotFound("Attribute %s is not found.", name)); + if (!with_attr_var) { + PADDLE_ENFORCE_EQ( + HasAttrVar(it->second), + false, + platform::errors::NotFound("Attribute %s is not found.", name)); + } return it->second; } @@ -790,6 +842,19 @@ struct SetAttrDescVisitor { void operator()(const std::vector &v) const { VectorToRepeated(v, attr_->mutable_bools()); } + + void operator()(const std::vector &v) const { + std::vector var_names; + for (auto var : v) { + var_names.emplace_back(var->Name()); + } + VectorToRepeated(var_names, attr_->mutable_vars_name()); + } + + void operator()(const VarDesc *desc) const { + attr_->set_var_name(desc->Name()); + } + void operator()(const std::vector &v) const { std::vector blocks_idx; for (auto blk : v) { @@ -866,12 +931,7 @@ void OpDesc::InferShape(const BlockDesc &block) { try { VLOG(3) << "CompileTime infer shape on " << Type(); auto &op_info = OpInfoMap::Instance().Get(this->Type()); - auto *checker = op_info.Checker(); - if (checker != nullptr) { - // set dafault value here - VLOG(10) << "begin to check attribute of " << Type(); - checker->Check(&attrs_); - } + this->CheckAttrs(); auto &infer_shape = op_info.infer_shape_; PADDLE_ENFORCE_EQ( static_cast(infer_shape), @@ -916,15 +976,62 @@ void OpDesc::InferVarType(BlockDesc *block) const { } } +void OpDesc::UpdateVarAttr(const std::string &name, const Attribute &attr) { + auto attr_type = static_cast(attr.index() - 1); + auto type = GetAttrType(name, true); + if (type == proto::AttrType::VAR) { + PADDLE_ENFORCE_EQ( + attr_type, + type, + platform::errors::InvalidArgument( + "Required attr.type == proto::AttrType::VAR, but received %s", + attr_type)); + auto *var_desc = PADDLE_GET_CONST(VarDesc *, attr); + VLOG(3) << "Update AttrVar " << name << " with " << var_desc->Name(); + attrs_[name] = FindVarRecursive(var_desc->Name()); + } else if (type == proto::AttrType::VARS) { + PADDLE_ENFORCE_EQ( + attr_type, + type, + platform::errors::InvalidArgument( + "Required attr.type == proto::AttrType::VARS, but received %s", + attr_type)); + auto vars_desc = PADDLE_GET_CONST(std::vector, attr); + std::vector new_val; + for (auto &var_desc : vars_desc) { + VLOG(3) << "Update AttrVars " << name << " with " << var_desc->Name(); + new_val.emplace_back(FindVarRecursive(var_desc->Name())); + } + attrs_[name] = std::move(new_val); + } +} + +VarDesc *OpDesc::FindVarRecursive(const std::string &name) { + auto *cur_block = block_; + while (cur_block != nullptr && cur_block->ID() >= 0) { + auto *var = block_->FindVar(name); + if (var != nullptr) { + return var; + } + cur_block = cur_block->ParentBlock(); + } + PADDLE_THROW(platform::errors::NotFound( + "Not found Var(%s) from Block(%d) back into global Block.", + name, + block_->ID())); +} + CompileTimeInferShapeContext::CompileTimeInferShapeContext( const OpDesc &op, const BlockDesc &block) : op_(op), block_(block) {} bool CompileTimeInferShapeContext::HasInput(const std::string &name) const { - if (op_.Inputs().find(name) == op_.Inputs().end()) { + auto inputs = op_.Inputs(/*with_attr_var=*/true); + if (inputs.find(name) == inputs.end()) { return false; } - const std::vector &input_names = op_.Input(name); + const std::vector &input_names = + op_.Input(name, /*with_attr_var=*/true); auto length = input_names.size(); if (length == 0) { return false; @@ -959,14 +1066,16 @@ bool CompileTimeInferShapeContext::HasOutput(const std::string &name) const { } bool CompileTimeInferShapeContext::HasAttr(const std::string &name) const { - return op_.HasAttr(name); + return op_.HasAttr(name, /*with_attr_var=*/false); } bool CompileTimeInferShapeContext::HasInputs(const std::string &name) const { - if (op_.Inputs().find(name) == op_.Inputs().end()) { + auto inputs = op_.Inputs(/*with_attr_var=*/true); + if (inputs.find(name) == inputs.end()) { return false; } - const std::vector &input_names = op_.Input(name); + const std::vector &input_names = + op_.Input(name, /*with_attr_var=*/true); if (input_names.empty()) { return false; } @@ -1004,7 +1113,7 @@ AttrReader CompileTimeInferShapeContext::Attrs() const { std::vector CompileTimeInferShapeContext::Inputs( const std::string &name) const { - return op_.Input(name); + return op_.Input(name, /*with_attr_var=*/true); } std::vector CompileTimeInferShapeContext::Outputs( @@ -1054,5 +1163,21 @@ proto::VarType::Type CompileTimeInferShapeContext::GetVarType( return block_.FindVarRecursive(name)->GetType(); } +std::vector AttrVarNames(const Attribute &attr) { + std::vector vars_name; + if (IsAttrVar(attr)) { + vars_name.emplace_back(PADDLE_GET_CONST(VarDesc *, attr)->Name()); + } else if (IsAttrVars(attr)) { + for (auto &iter : PADDLE_GET_CONST(std::vector, attr)) { + vars_name.emplace_back(iter->Name()); + } + } else { + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported Attribute value type `%s` for AttrVarNames", + platform::demangle(attr.type().name()))); + } + return vars_name; +} + } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/op_desc.h b/paddle/fluid/framework/op_desc.h index 02186a02e3d8358de2122af2742518b1d8a9ed92..a1e11cb8707099f1e8ff597da761889384c5784b 100644 --- a/paddle/fluid/framework/op_desc.h +++ b/paddle/fluid/framework/op_desc.h @@ -27,6 +27,7 @@ limitations under the License. */ namespace paddle { namespace framework { +class VarDesc; class BlockDesc; class ProgramDesc; @@ -55,7 +56,10 @@ class OpDesc { const std::vector &Input(const std::string &name) const; - std::vector InputArgumentNames() const; + std::vector Input(const std::string &name, + bool with_attr_var) const; + + std::vector InputArgumentNames(bool with_attr_var = false) const; void SetInput(const std::string ¶m_name, const std::vector &args); @@ -72,24 +76,27 @@ class OpDesc { void RemoveInput(const std::string &name); - bool HasAttr(const std::string &name) const { - return attrs_.find(name) != attrs_.end(); - } + bool HasAttr(const std::string &name, bool with_attr_var = false) const; bool HasProtoAttr(const std::string &name) const; - proto::AttrType GetAttrType(const std::string &name) const; + proto::AttrType GetAttrType(const std::string &name, + bool with_attr_var = false) const; - std::vector AttrNames() const; + std::vector AttrNames(bool with_attr_var = false) const; void SetAttr(const std::string &name, const Attribute &v); void RemoveAttr(const std::string &name); + void SetVarAttr(const std::string &name, VarDesc *var); + + void SetVarsAttr(const std::string &name, std::vector vars); + void SetBlockAttr(const std::string &name, BlockDesc *block); void SetBlocksAttr(const std::string &name, std::vector blocks); - Attribute GetAttr(const std::string &name) const; + Attribute GetAttr(const std::string &name, bool with_attr_var = false) const; template T GetAttrIfExists(const std::string &name) const { @@ -120,11 +127,15 @@ class OpDesc { // Only be used in C++ void SetAttrMap(const AttributeMap &attr_map); - std::vector InputNames() const { return MapKeys(inputs_); } + std::vector InputNames(bool with_attr_var = false) const { + return MapKeys(inputs_); + } std::vector OutputNames() const { return MapKeys(outputs_); } const VariableNameMap &Inputs() const { return inputs_; } + VariableNameMap Inputs(bool with_attr_var) const; + const VariableNameMap &Outputs() const { return outputs_; } VariableNameMap *MutableInputs() { @@ -156,12 +167,18 @@ class OpDesc { const BlockDesc *Block() const { return this->block_; } + void UpdateVarAttr(const std::string &name, const Attribute &attr); + // The Id() and OrignalId() are only used for auto parallel. uint64_t Id() const { return id_; } uint64_t OriginalId() const { return original_id_; } void SetOriginalId(uint64_t original_id) { original_id_ = original_id; } private: + friend class ProgramDesc; + // Find VarDesc from OpDesc located Block into global Block + VarDesc *FindVarRecursive(const std::string &name); + template static std::vector MapKeys(const MapType &map) { std::vector ret_val; @@ -188,6 +205,7 @@ class OpDesc { VariableNameMap inputs_; // output arg name => output variable names VariableNameMap outputs_; + // attribute name => all original attrs AttributeMap attrs_; // need_update_ indicate there some local changes not be synchronized. If @@ -202,5 +220,7 @@ class OpDesc { // current OpDesc is not built from the other one. uint64_t original_id_ = id_; }; + +std::vector AttrVarNames(const Attribute &attr); } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index a2bade9809b21c1843d8243e13aa1ffb4d72977d..b4ef3efb8216c740cbec9141d099e66bb4838c32 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -447,6 +447,13 @@ OperatorBase::OperatorBase(const std::string& type, GenerateTemporaryNames(); CheckAllInputOutputSet(); } + // In OperatorBase level, all attribute with VarDesc type will be considered + // as Input. + for (auto& attr : FilterAttrVar(attrs)) { + VLOG(3) << "found Attribute with Variable type: " << attr.first; + inputs_[attr.first] = std::move(AttrVarNames(attr.second)); + attrs_.erase(attr.first); + } } std::vector OperatorBase::InputVars() const { @@ -2725,6 +2732,8 @@ void OperatorWithKernel::BuildPhiKernelContext( for (size_t i = 0; i < attr_names.size(); ++i) { VLOG(6) << "BuildPhiKernelContext: " << attr_names[i] << ": " << attr_defs[i].type_index; + // attribute with Variable type has been placed into Inputs(), and + // we can parse them from RuntimeContext.inputs. auto attr_iter = Attrs().find(attr_names[i]); switch (attr_defs[i].type_index) { case phi::AttributeType::SCALAR: diff --git a/paddle/fluid/framework/program_desc.cc b/paddle/fluid/framework/program_desc.cc index b184bc8be368138a487cd2192c9354878ea63104..e35df9f59dbb6f1ea2d6950a5c0035b3824f55f8 100644 --- a/paddle/fluid/framework/program_desc.cc +++ b/paddle/fluid/framework/program_desc.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include "paddle/fluid/framework/program_desc.h" +#include #include "paddle/fluid/framework/feed_fetch_type.h" #include "paddle/fluid/framework/version.h" @@ -97,6 +98,23 @@ ProgramDesc::ProgramDesc(const ProgramDesc &o) { block_descs.push_back(MutableBlock(block_id)); } op->SetBlocksAttr(attr_name, block_descs); + } else if (op->GetAttrType(attr_name, true) == proto::AttrType::VAR) { + VarDesc *var_desc = + PADDLE_GET_CONST(VarDesc *, op->GetAttr(attr_name, true)); + op->SetVarAttr(attr_name, + o.Block(block_id).FindVarRecursive(var_desc->Name())); + } else if (op->GetAttrType(attr_name, true) == proto::AttrType::VARS) { + std::vector vars_desc = PADDLE_GET_CONST( + std::vector, op->GetAttr(attr_name, true)); + std::vector new_vars_desc; + std::transform( + vars_desc.begin(), + vars_desc.end(), + std::back_inserter(new_vars_desc), + [&](VarDesc *var_desc) { + return o.Block(block_id).FindVarRecursive(var_desc->Name()); + }); + op->SetVarsAttr(attr_name, new_vars_desc); } } } @@ -129,7 +147,21 @@ void ProgramDesc::InitFromProto() { for (auto &block : blocks_) { for (auto *op : block->AllOps()) { for (const auto &attr : op->Proto()->attrs()) { - if (attr.type() == proto::AttrType::BLOCK) { + if (attr.type() == proto::AttrType::VAR) { + std::string var_name = attr.var_name(); + VLOG(3) << "InitFromProto: SetVarAttr " << attr.name() << " from " + << var_name; + op->SetVarAttr(attr.name(), op->FindVarRecursive(var_name)); + } else if (attr.type() == proto::AttrType::VARS) { + auto vars_name = attr.vars_name(); + std::vector vars_desc; + for (auto &var_name : vars_name) { + VLOG(3) << "InitFromProto: SetVarsAttr " << attr.name() << " from " + << var_name; + vars_desc.emplace_back(op->FindVarRecursive(var_name)); + } + op->SetVarsAttr(attr.name(), vars_desc); + } else if (attr.type() == proto::AttrType::BLOCK) { size_t blk_idx = attr.block_idx(); op->SetBlockAttr(attr.name(), this->MutableBlock(blk_idx)); } else if (attr.type() == proto::AttrType::BLOCKS) { diff --git a/paddle/fluid/framework/prune.cc b/paddle/fluid/framework/prune.cc index ede6a99c43678c3ee8fa4697cf388da7ee7c4c94..ceb45a83711ea941fe1a44917b798018fb88a898 100644 --- a/paddle/fluid/framework/prune.cc +++ b/paddle/fluid/framework/prune.cc @@ -180,6 +180,9 @@ void prune_impl(const proto::ProgramDesc& input, std::map* pruned_origin_block_id_map) { auto& block = input.blocks(block_id); auto& ops = block.ops(); + auto add_dependent_var = [&](const std::string& name) { + if (feed_var_names.count(name) == 0) dependent_vars->insert(name); + }; bool expect_feed = true; for (auto& op_desc : ops) { @@ -245,8 +248,17 @@ void prune_impl(const proto::ProgramDesc& input, // For eval / infer mode, there is no optimize op in program. for (auto& var : op_desc.inputs()) { for (auto& argu : var.arguments()) { - if (feed_var_names.count(argu) == 0) { - dependent_vars->insert(argu); + add_dependent_var(argu); + } + } + // NOTE(dev): All attibute with VarDesc type is considered as Input, + // so they shall be added into dependent_vars. + for (auto& attr : op_desc.attrs()) { + if (attr.type() == proto::AttrType::VAR) { + add_dependent_var(attr.var_name()); + } else if (attr.type() == proto::AttrType::VARS) { + for (auto& name : attr.vars_name()) { + add_dependent_var(name); } } } @@ -331,20 +343,30 @@ void prune_impl(const proto::ProgramDesc& input, } std::set var_names; + auto add_var_names = [&](const std::string& name) { + if (var_map.count(name) != 0) var_names.insert(name); + }; for (const auto& op : *op_field) { auto& input_field = op.inputs(); for (auto& input_var : input_field) { for (auto& arg : input_var.arguments()) { - if (var_map.count(arg) != 0) { - var_names.insert(arg); - } + add_var_names(arg); } } auto& output_field = op.outputs(); for (auto& output_var : output_field) { for (auto& arg : output_var.arguments()) { - if (var_map.count(arg) != 0) { - var_names.insert(arg); + add_var_names(arg); + } + } + // NOTE(dev): All attibute with VarDesc type is considered as Input, + // so they shall be added into dependent_vars. + for (auto& attr : op.attrs()) { + if (attr.type() == proto::AttrType::VAR) { + add_var_names(attr.var_name()); + } else if (attr.type() == proto::AttrType::VARS) { + for (auto& name : attr.vars_name()) { + add_var_names(name); } } } diff --git a/paddle/fluid/framework/type_defs.h b/paddle/fluid/framework/type_defs.h index 3bcad63f21a841d4be1eeca5360485a6760eb260..31a006914aca7cdeb98e110dada80e4a6a81e180 100644 --- a/paddle/fluid/framework/type_defs.h +++ b/paddle/fluid/framework/type_defs.h @@ -34,6 +34,7 @@ class OperatorBase; class OpDesc; class InferShapeContext; class InferVarTypeContext; +class VarDesc; class BlockDesc; class Variable; class InferNoNeedBufferVarsFN; @@ -55,7 +56,9 @@ using Attribute = paddle::variant, std::vector, - std::vector>; + std::vector, + VarDesc*, + std::vector>; using AttributeMap = std::unordered_map; #ifdef PADDLE_WITH_ASCEND_CL @@ -73,6 +76,8 @@ using NPUAttribute = paddle::variant, std::vector, std::vector, + VarDesc*, + std::vector, std::vector>>; using NPUAttributeMap = std::unordered_map; diff --git a/paddle/fluid/operators/tile_op.cc b/paddle/fluid/operators/tile_op.cc index 1215bf2de3c67ca83adfaa878d7ddb0560691fb5..1d5b57a8a3d53bf50ca74d86971c8387e39af982 100644 --- a/paddle/fluid/operators/tile_op.cc +++ b/paddle/fluid/operators/tile_op.cc @@ -114,42 +114,6 @@ class TileGradOp : public framework::OperatorWithKernel { "TileGrad"); auto x_dims = ctx->GetInputDim("X"); - - std::vector repeat_times = - ctx->Attrs().Get>("repeat_times"); - if (repeat_times.size() == 0) { - repeat_times = std::vector(x_dims.size(), -1); - } - - auto out_dims = ctx->GetInputDim(framework::GradVarName("Out")); - auto x_dim_vec = phi::vectorize(x_dims); - if (x_dim_vec.size() > repeat_times.size()) { - auto diff = x_dim_vec.size() - repeat_times.size(); - repeat_times.insert(repeat_times.begin(), diff, -1); - } else { - auto diff = repeat_times.size() - x_dim_vec.size(); - x_dim_vec.insert(x_dim_vec.begin(), diff, -1); - } - - for (size_t i = 0; i < repeat_times.size(); ++i) { - if (repeat_times[i] == -1 || x_dim_vec[i] == -1) { - continue; - } else { - if (ctx->IsRuntime()) { - PADDLE_ENFORCE_EQ( - x_dim_vec[i] * repeat_times[i], - out_dims[i], - platform::errors::InvalidArgument( - "The size (%d) of the dimension %d of Input(Out@GRAD) should " - "be equal to the multiplication of the crroresponding " - "dimension size of Input(X) (%d) and repeat_times (%d).", - out_dims[i], - i, - x_dim_vec[i], - repeat_times[i])); - } - } - } auto x_grad_name = framework::GradVarName("X"); if (ctx->HasOutput(x_grad_name)) { diff --git a/paddle/fluid/pybind/protobuf.cc b/paddle/fluid/pybind/protobuf.cc index 4cdf135b8ed59de52a22d2c3e2be934437da44f0..a670813a245a73768745cc5448b727bb26e48b2f 100644 --- a/paddle/fluid/pybind/protobuf.cc +++ b/paddle/fluid/pybind/protobuf.cc @@ -289,7 +289,9 @@ void BindOpDesc(pybind11::module *m) { .value("BOOL", pd::proto::AttrType::BOOLEAN) .value("BOOLS", pd::proto::AttrType::BOOLEANS) .value("BLOCK", pd::proto::AttrType::BLOCK) - .value("BLOCKS", pd::proto::AttrType::BLOCKS); + .value("BLOCKS", pd::proto::AttrType::BLOCKS) + .value("VAR", pd::proto::AttrType::VAR) + .value("VARS", pd::proto::AttrType::VARS); pybind11::class_ op_desc(*m, "OpDesc", ""); op_desc @@ -300,8 +302,16 @@ void BindOpDesc(pybind11::module *m) { .def("copy_from", &pd::OpDesc::CopyFrom) .def("type", &pd::OpDesc::Type) .def("set_type", &pd::OpDesc::SetType) - .def("input", &pd::OpDesc::Input) - .def("input_names", &pd::OpDesc::InputNames) + .def("input", + [](pd::OpDesc &self, const std::string &name) { + return self.Input(name); + }) + .def( + "input_names", + [](pd::OpDesc &self, bool with_attr_var) { + return self.InputNames(with_attr_var); + }, + py::arg("with_attr_var") = false) .def("output", &pd::OpDesc::Output) .def("output_names", &pd::OpDesc::OutputNames) .def("set_input", @@ -318,16 +328,46 @@ void BindOpDesc(pybind11::module *m) { }) .def("remove_output", &pd::OpDesc::RemoveOutput) .def("remove_input", &pd::OpDesc::RemoveInput) - .def("input_arg_names", &pd::OpDesc::InputArgumentNames) + .def( + "input_arg_names", + [](pd::OpDesc &self, bool with_attr_var) { + return self.InputArgumentNames(with_attr_var); + }, + py::arg("with_attr_var") = false) .def("output_arg_names", &pd::OpDesc::OutputArgumentNames) .def("_rename_input", &pd::OpDesc::RenameInput) .def("_rename_output", &pd::OpDesc::RenameOutput) - .def("has_attr", &pd::OpDesc::HasAttr) - .def("attr_type", &pd::OpDesc::GetAttrType) - .def("attr_names", &pd::OpDesc::AttrNames) + .def( + "has_attr", + [](pd::OpDesc &self, const std::string &name, bool with_attr_var) { + return self.HasAttr(name, with_attr_var); + }, + py::arg("name"), + py::arg("with_attr_var") = false) + .def( + "attr_type", + [](pd::OpDesc &self, const std::string &name, bool with_attr_var) { + return self.GetAttrType(name, with_attr_var); + }, + py::arg("name"), + py::arg("with_attr_var") = false) + .def( + "attr_names", + [](pd::OpDesc &self, bool with_attr_var) { + return self.AttrNames(with_attr_var); + }, + py::arg("with_attr_var") = false) .def("_set_attr", &pd::OpDesc::SetAttr) .def("remove_attr", &pd::OpDesc::RemoveAttr) - .def("attr", &pd::OpDesc::GetAttr) + .def( + "attr", + [](pd::OpDesc &self, const std::string &name, bool with_attr_var) { + return self.GetAttr(name, with_attr_var); + }, + py::arg("name"), + py::arg("with_attr_var") = false) + .def("set_var_attr", &pd::OpDesc::SetVarAttr) + .def("set_vars_attr", &pd::OpDesc::SetVarsAttr) .def("set_block_attr", &pd::OpDesc::SetBlockAttr) .def("set_blocks_attr", &pd::OpDesc::SetBlocksAttr) .def("set_serialized_attr", @@ -351,7 +391,7 @@ void BindOpDesc(pybind11::module *m) { .def("id", &pd::OpDesc::Id) .def("original_id", &pd::OpDesc::OriginalId) .def("set_original_id", &pd::OpDesc::SetOriginalId) - .def("inputs", &pd::OpDesc::Inputs) + .def("inputs", [](pd::OpDesc &self) { return self.Inputs(); }) .def("outputs", &pd::OpDesc::Outputs); } diff --git a/paddle/phi/api/yaml/legacy_api.yaml b/paddle/phi/api/yaml/legacy_api.yaml index e90250901dc8d34b390c481fda0c5fe61bc5bdef..b67498bcc1a20c4cf914a1d4ca06d0a7bf500e78 100755 --- a/paddle/phi/api/yaml/legacy_api.yaml +++ b/paddle/phi/api/yaml/legacy_api.yaml @@ -667,7 +667,7 @@ backward : divide_grad - api : dropout - args : (Tensor x, Tensor seed_tensor, float p, bool is_test, str mode, int seed, bool fix_seed) + args : (Tensor x, Tensor seed_tensor, Scalar p, bool is_test, str mode, int seed, bool fix_seed) output : Tensor(out), Tensor(mask) infer_meta : func : DropoutInferMeta diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml index b08a42ea3fde3f73ef423907ba1ea470ac1d6c4d..fdf2321ea38e12e27ef07ca4e873da7ed378d173 100755 --- a/paddle/phi/api/yaml/legacy_backward.yaml +++ b/paddle/phi/api/yaml/legacy_backward.yaml @@ -644,8 +644,8 @@ backward : divide_double_grad - backward_api : dropout_grad - forward : dropout (Tensor x, Tensor seed_tensor, float p, bool is_test, str mode, int seed, bool fix_seed) -> Tensor(out), Tensor(mask) - args : (Tensor mask, Tensor out_grad, float p, bool is_test, str mode) + forward : dropout (Tensor x, Tensor seed_tensor, Scalar p, bool is_test, str mode, int seed, bool fix_seed) -> Tensor(out), Tensor(mask) + args : (Tensor mask, Tensor out_grad, Scalar p, bool is_test, str mode) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta diff --git a/paddle/phi/core/enforce.cc b/paddle/phi/core/enforce.cc index 8074fbeb4918043e121503626eb4120cb13c7c0c..4eb580955a97c9f787159637f522c550eeb4b35c 100644 --- a/paddle/phi/core/enforce.cc +++ b/paddle/phi/core/enforce.cc @@ -27,6 +27,7 @@ class EagerVariable; } namespace paddle { namespace framework { +class VarDesc; class BlockDesc; using Attribute = paddle::variant, std::vector, - std::vector>; + std::vector, + VarDesc*, + std::vector>; using AttributeMap = std::unordered_map; } // namespace framework namespace imperative { diff --git a/paddle/phi/infermeta/binary.cc b/paddle/phi/infermeta/binary.cc index 3cb3e4d6888a4b1f49de67a9f97330dfc955f55e..44e53fc32ccffe7712848520d77e69e540bdb162 100644 --- a/paddle/phi/infermeta/binary.cc +++ b/paddle/phi/infermeta/binary.cc @@ -953,7 +953,7 @@ void DistInferMeta(const MetaTensor& x, void DropoutInferMeta(const MetaTensor& x, const MetaTensor& seed_tensor, - float p, + const Scalar& p, bool is_test, const std::string& mode, int seed, @@ -973,7 +973,7 @@ void DropoutInferMeta(const MetaTensor& x, void DropoutNdInferMeta(const MetaTensor& x, const MetaTensor& seed_tensor, - float p, + const Scalar& p, bool is_test, const std::string& mode, int seed, diff --git a/paddle/phi/infermeta/binary.h b/paddle/phi/infermeta/binary.h index c57a702daf8b35069f4a6fd9065a4fdb2b3fd586..7dcbe33e0a9337e886a10ac8b3ffc2c782c3a918 100644 --- a/paddle/phi/infermeta/binary.h +++ b/paddle/phi/infermeta/binary.h @@ -143,7 +143,7 @@ void DotInferMeta(const MetaTensor& x, const MetaTensor& y, MetaTensor* out); void DropoutInferMeta(const MetaTensor& x, const MetaTensor& seed_tensor, - float p, + const Scalar& p, bool is_test, const std::string& mode, int seed, @@ -153,7 +153,7 @@ void DropoutInferMeta(const MetaTensor& x, void DropoutNdInferMeta(const MetaTensor& x, const MetaTensor& seed_tensor, - float p, + const Scalar& p, bool is_test, const std::string& mode, int seed, diff --git a/paddle/phi/kernels/cpu/dropout_grad_kernel.cc b/paddle/phi/kernels/cpu/dropout_grad_kernel.cc index 42b2834aaffc9ff45230c8a8f4d96b1b6a8eedb6..445e92716a899d86b3147dd715036266444d87b3 100644 --- a/paddle/phi/kernels/cpu/dropout_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/dropout_grad_kernel.cc @@ -24,7 +24,7 @@ template void DropoutNdGradKernel(const Context& dev_ctx, const DenseTensor& mask, const DenseTensor& out_grad, - float p, + const Scalar& p, bool is_test, const std::string& mode, const std::vector& axis, @@ -35,6 +35,7 @@ void DropoutNdGradKernel(const Context& dev_ctx, auto dX = EigenVector::Flatten(*grad_x); auto dY = EigenVector::Flatten(*grad_y); + float prob = p.to(); auto& place = *dev_ctx.eigen_device(); auto& dropout_implementation = mode; @@ -42,20 +43,20 @@ void DropoutNdGradKernel(const Context& dev_ctx, if (dropout_implementation == "upscale_in_train") { dX.device(place) = static_cast(1) * dY; } else { - dX.device(place) = dY * static_cast(1.0f - p); + dX.device(place) = dY * static_cast(1.0f - prob); } } else { std::vector out_dims = phi::vectorize(out_grad.dims()); auto M = EigenVector::Flatten(mask); if (dropout_implementation == "upscale_in_train") { - if (p == 1.0f) { + if (prob == 1.0f) { dX.device(place) = static_cast(0) * dY; } else { if (axis.empty()) { - dX.device(place) = dY * M.cast() / static_cast(1.0f - p); + dX.device(place) = dY * M.cast() / static_cast(1.0f - prob); } else { - dX.device(place) = - dY * M.broadcast(out_dims).cast() / static_cast(1.0f - p); + dX.device(place) = dY * M.broadcast(out_dims).cast() / + static_cast(1.0f - prob); } } } else { @@ -72,12 +73,12 @@ template void DropoutGradRawKernel(const Context& dev_ctx, const DenseTensor& mask, const DenseTensor& out_grad, - float p, + const Scalar& p, bool is_test, const std::string& mode, DenseTensor* x_grad) { DropoutNdGradKernel( - dev_ctx, mask, out_grad, p, is_test, mode, {}, x_grad); + dev_ctx, mask, out_grad, p.to(), is_test, mode, {}, x_grad); } } // namespace phi diff --git a/paddle/phi/kernels/cpu/dropout_kernel.cc b/paddle/phi/kernels/cpu/dropout_kernel.cc index d3ca21cfe33b9d1006d86f93c67800c97ed393dd..41c33fcf5dd3ff86b610c074874a815ea4eaa381 100644 --- a/paddle/phi/kernels/cpu/dropout_kernel.cc +++ b/paddle/phi/kernels/cpu/dropout_kernel.cc @@ -25,7 +25,7 @@ namespace phi { template void ComputeDropoutInference(const Context& ctx, const DenseTensor& x, - float dropout_prob, + const Scalar& dropout_prob, bool upscale_in_train, DenseTensor* y) { if (upscale_in_train) { @@ -41,7 +41,7 @@ void ComputeDropoutInference(const Context& ctx, auto X = EigenMatrix::Reshape(x, 1); auto Y = EigenMatrix::Reshape(*y, 1); auto& place = *ctx.eigen_device(); - Y.device(place) = X * static_cast(1.0f - dropout_prob); + Y.device(place) = X * static_cast(1.0f - dropout_prob.to()); } } @@ -49,7 +49,7 @@ template void DropoutRawKernel(const Context& dev_ctx, const DenseTensor& x, const paddle::optional& seed_tensor, - float p, + const Scalar& p, bool is_test, const std::string& mode, int seed, @@ -59,7 +59,7 @@ void DropoutRawKernel(const Context& dev_ctx, auto* y = out; const auto* x_data = x.data(); T* y_data = dev_ctx.template Alloc(y); - float dropout_prob = p; + float dropout_prob = p.to(); auto& dropout_implementation = mode; bool upscale_in_train = (dropout_implementation == "upscale_in_train"); @@ -109,7 +109,7 @@ template void DropoutNdKernel(const Context& dev_ctx, const DenseTensor& x, const paddle::optional& seed_tensor, - float p, + const Scalar& p, bool is_test, const std::string& mode, int seed, @@ -120,7 +120,7 @@ void DropoutNdKernel(const Context& dev_ctx, auto* y = out; const auto* x_data = x.data(); T* y_data = dev_ctx.template Alloc(y); - float dropout_prob = p; + float dropout_prob = p.to(); auto& dropout_implementation = mode; bool upscale_in_train = (dropout_implementation == "upscale_in_train"); diff --git a/paddle/phi/kernels/dropout_grad_kernel.h b/paddle/phi/kernels/dropout_grad_kernel.h index d8d5363ad59b7298d7b4216204dc3c433152e34a..c61e4d0b8598dc2fc2c21edbea6d72c3f97ea2e7 100644 --- a/paddle/phi/kernels/dropout_grad_kernel.h +++ b/paddle/phi/kernels/dropout_grad_kernel.h @@ -23,7 +23,7 @@ template void DropoutGradRawKernel(const Context& dev_ctx, const DenseTensor& mask, const DenseTensor& out_grad, - float p, + const Scalar& p, bool is_test, const std::string& mode, DenseTensor* x_grad); @@ -32,7 +32,7 @@ template void DropoutNdGradKernel(const Context& dev_ctx, const DenseTensor& mask, const DenseTensor& out_grad, - float p, + const Scalar& p, bool is_test, const std::string& mode, const std::vector& axis, diff --git a/paddle/phi/kernels/dropout_kernel.h b/paddle/phi/kernels/dropout_kernel.h index cba8160058e9905fd759b8d83027edce97dc332b..ff718d641bedcedcee30dc1515476fe609d3d001 100644 --- a/paddle/phi/kernels/dropout_kernel.h +++ b/paddle/phi/kernels/dropout_kernel.h @@ -24,7 +24,7 @@ template void DropoutRawKernel(const Context& dev_ctx, const DenseTensor& x, const paddle::optional& seed_tensor, - float p, + const Scalar& p, bool is_test, const std::string& mode, int seed, @@ -36,7 +36,7 @@ template void DropoutNdKernel(const Context& dev_ctx, const DenseTensor& x, const paddle::optional& seed_tensor, - float p, + const Scalar& p, bool is_test, const std::string& mode, int seed, diff --git a/paddle/phi/kernels/gpu/dropout_grad_kernel.cu b/paddle/phi/kernels/gpu/dropout_grad_kernel.cu index 841d98fbc003e547f5a11057b70b7dd6be6bc019..4aa59cded8f379815b6eaed32b2e6de85a48bdc3 100644 --- a/paddle/phi/kernels/gpu/dropout_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/dropout_grad_kernel.cu @@ -24,29 +24,41 @@ template void DropoutGradRawKernel(const Context& dev_ctx, const DenseTensor& mask, const DenseTensor& out_grad, - float p, + const Scalar& p, bool is_test, const std::string& mode, DenseTensor* x_grad) { bool upscale_in_train = (mode == "upscale_in_train"); x_grad->mutable_data(dev_ctx.GetPlace()); - paddle::operators::DropoutGradGPUKernelDriver( - dev_ctx, is_test, p, upscale_in_train, out_grad, mask, x_grad, false); + paddle::operators::DropoutGradGPUKernelDriver(dev_ctx, + is_test, + p.to(), + upscale_in_train, + out_grad, + mask, + x_grad, + false); } template void DropoutNdGradKernel(const Context& dev_ctx, const DenseTensor& mask, const DenseTensor& out_grad, - float p, + const Scalar& p, bool is_test, const std::string& mode, const std::vector& axis, DenseTensor* x_grad) { bool upscale_in_train = (mode == "upscale_in_train"); dev_ctx.template Alloc(x_grad); - paddle::operators::DropoutGradGPUKernelDriver( - dev_ctx, is_test, p, upscale_in_train, out_grad, mask, x_grad, true); + paddle::operators::DropoutGradGPUKernelDriver(dev_ctx, + is_test, + p.to(), + upscale_in_train, + out_grad, + mask, + x_grad, + true); } } // namespace phi diff --git a/paddle/phi/kernels/gpu/dropout_kernel.cu b/paddle/phi/kernels/gpu/dropout_kernel.cu index f973bb8e15fc75b19e98d8a8116f380699119fe9..0f2a8d9c938484f8bd3e75de5907faadd8116eec 100644 --- a/paddle/phi/kernels/gpu/dropout_kernel.cu +++ b/paddle/phi/kernels/gpu/dropout_kernel.cu @@ -24,7 +24,7 @@ template void DropoutRawKernel(const Context& dev_ctx, const DenseTensor& x, const paddle::optional& seed_tensor, - float p, + const Scalar& p, bool is_test, const std::string& mode, int seed, @@ -36,7 +36,7 @@ void DropoutRawKernel(const Context& dev_ctx, mask->mutable_data(dev_ctx.GetPlace()); paddle::operators::DropoutFwGPUKernelDriver(dev_ctx, is_test, - p, + p.to(), upscale_in_train, fix_seed, seed, @@ -51,7 +51,7 @@ template void DropoutNdKernel(const Context& dev_ctx, const DenseTensor& x, const paddle::optional& seed_tensor, - float p, + const Scalar& p, bool is_test, const std::string& mode, int seed, @@ -64,7 +64,7 @@ void DropoutNdKernel(const Context& dev_ctx, dev_ctx.template Alloc(mask); paddle::operators::DropoutFwGPUKernelDriver(dev_ctx, is_test, - p, + p.to(), upscale_in_train, fix_seed, seed, diff --git a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/random.py b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/random.py index fdbf0312db664e54dd4711df6f91a1796887aeff..55c6a3308b8c12b972d2987a29fc649b75e87bc9 100644 --- a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/random.py +++ b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/random.py @@ -18,7 +18,7 @@ import numpy as np from paddle import _C_ops from paddle.fluid import core from paddle.fluid.data_feeder import check_variable_and_dtype -from paddle.fluid.framework import _non_static_mode, default_main_program +from paddle.fluid.framework import _non_static_mode, default_main_program, Variable from paddle.fluid.layer_helper import LayerHelper __all__ = [] @@ -187,11 +187,12 @@ def dropout(x, if rng_name is None: return paddle.nn.functional.dropout(x, p, axis, training, mode, name) + if not isinstance(p, (float, int, Variable)): + raise TypeError("p argument should be a number(int|float) or Variable") + # fast return for p == 0 - if p == 0: return x + if isinstance(p, (int, float)) and p == 0: return x - assert isinstance(p, (float, int)), \ - TypeError("p argument should be a number") assert 0 <= p <= 1, ValueError("p argument should between 0 and 1") assert mode in ('downscale_in_infer', 'upscale_in_train'), \ ValueError( @@ -211,6 +212,11 @@ def dropout(x, seed = determinate_seed(rng_name) + if isinstance(p, Variable) and not p.shape != [1]: + raise TypeError( + "Required p.shape == [1] if type(p) is Variable, but received p.shape = {}" + .format(p.shape)) + helper = LayerHelper('dropout', **locals()) check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'dropout') diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 4ce4801d32bd6f7eba256cc41e8a98594b5ae2b2..8db5a4353aeebe386e2520a1e6a7b773d3320d47 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -1262,6 +1262,17 @@ def _varbase_creator(type=core.VarDesc.VarType.LOD_TENSOR, True if persistable else False) +def _all_is_type(vals, expected_type): + """ + Return True if type of each element is expected_type. + + NOTE: BuiltIn all() will always return True if vals is empty. + """ + assert isinstance(vals, (list, tuple)) + if not vals: return False + return all(isinstance(v, expected_type) for v in vals) + + class VariableMetaClass(type): @classmethod @@ -2934,7 +2945,28 @@ class Operator(object): if skip_op_callstack and name == "op_callstack": continue - attr_type = self.desc.attr_type(name) + attr_type = self.desc.attr_type(name, True) + if attr_type == core.AttrType.VAR: + attr_var_name = self.desc.attr(name, True).name() + a = "{name} = Var['{value}']".format(name=name, + type=attr_type, + value=attr_var_name) + attrs_str += a + if i != len(attr_names) - 1: + attrs_str += ", " + continue + + if attr_type == core.AttrType.VARS: + attr_var_names = [ + "'%s'" % var.name() for var in self.desc.attr(name, True) + ] + a = "{name} = Vars[{value}]".format( + name=name, type=attr_type, value=','.join(attr_var_names)) + attrs_str += a + if i != len(attr_names) - 1: + attrs_str += ", " + continue + if attr_type == core.AttrType.BLOCK: a = "{name} = block[{value}]".format( name=name, type=attr_type, value=self._block_attr_id(name)) @@ -3128,10 +3160,13 @@ class Operator(object): Raises: ValueError: If the type of value doesn't match with desc.attr_type(name). """ - if isinstance(val, Block): + if isinstance(val, Variable): + self.desc.set_var_attr(name, val.desc) + elif isinstance(val, list) and _all_is_type(val, Variable): + self.desc.set_vars_attr(name, [v.desc for v in val]) + elif isinstance(val, Block): self.desc.set_block_attr(name, val.desc) - elif isinstance(val, list) and val and all( - isinstance(v, Block) for v in val): + elif isinstance(val, list) and val and _all_is_type(val, Block): self.desc.set_blocks_attr(name, [v.desc for v in val]) elif isinstance(val, core.BlockDesc) or \ isinstance(val, core.ProgramDesc): @@ -3141,7 +3176,7 @@ class Operator(object): @property def attr_names(self): - return self.desc.attr_names() + return self.desc.attr_names(True) def attr(self, name): """ @@ -4392,10 +4427,13 @@ class IrOpNode(IrNode): assert self.node.op() is not None, \ "The node operator description can not be None." desc = self.node.op() - if isinstance(val, Block): + if isinstance(val, Variable): + desc.set_var_attr(name, val.desc) + elif isinstance(val, list) and _all_is_type(val, Variable): + desc.set_vars_attr(name, [v.desc for v in val]) + elif isinstance(val, Block): desc.set_block_attr(name, val.desc) - elif isinstance(val, list) and val and \ - all(isinstance(v, Block) for v in val): + elif isinstance(val, list) and val and _all_is_type(val, Block): desc.set_blocks_attr(name, [v.desc for v in val]) elif isinstance(val, core.BlockDesc) or \ isinstance(val, core.ProgramDesc): @@ -4850,10 +4888,13 @@ class IrGraph(object): """ Update the value of desc's attribute by attribute's name. """ - if isinstance(val, Block): + if isinstance(val, Variable): + desc.set_var_attr(name, val.desc) + elif isinstance(val, list) and _all_is_type(val, Variable): + desc.set_vars_attr(name, [v.desc for v in val]) + elif isinstance(val, Block): desc.set_block_attr(name, val.desc) - elif isinstance(val, list) and val and all( - isinstance(v, Block) for v in val): + elif isinstance(val, list) and val and _all_is_type(val, Block): desc.set_blocks_attr(name, [v.desc for v in val]) elif isinstance(val, core.BlockDesc) or \ isinstance(val, core.ProgramDesc): diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 5b4fe640d01fc8080fd391be4f981f0a1a60b44b..3ce7c453613b54687380fb9148d3c809aa3f7c26 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -1132,8 +1132,11 @@ def dropout(x, x = fluid.data(name="data", shape=[None, 32, 32], dtype="float32") dropped = fluid.layers.dropout(x, dropout_prob=0.5) """ + if not isinstance(dropout_prob, (float, int, Variable)): + raise TypeError( + "dropout_prob argument should be a number(int|float) or Variable") # fast return for p == 0 - if dropout_prob == 0: + if isinstance(dropout_prob, (int, float)) and dropout_prob == 0: return x if _non_static_mode(): @@ -1152,6 +1155,10 @@ def dropout(x, def get_attrs(prog, dropout_prob, is_test, seed): if (seed is None or seed == 0) and prog.random_seed != 0: seed = prog.random_seed + if isinstance(dropout_prob, Variable) and not dropout_prob.shape != [1]: + raise TypeError( + "Required dropout_prob.shape == [1] if type(dropout_prob) is Variable, but received dropout_prob.shape = {}" + .format(dropout_prob.shape)) attrs = { 'dropout_prob': dropout_prob, 'is_test': is_test, diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index 6862ac1fd66346ffca7ad0189261b85c65300723..dbbc207fba4014d4088542883a07bd8070a92c6b 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -390,9 +390,7 @@ def concat(input, axis=0, name=None): attrs = {} if isinstance(axis, Variable): axis.stop_gradient = True - inputs['AxisTensor'] = axis - else: - attrs['axis'] = axis + attrs['axis'] = axis helper.append_op(type='concat', inputs=inputs, diff --git a/python/paddle/fluid/tests/unittests/test_attribute_var.py b/python/paddle/fluid/tests/unittests/test_attribute_var.py new file mode 100644 index 0000000000000000000000000000000000000000..a79a8d400a36339bd6c58279d5def541bea648dd --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_attribute_var.py @@ -0,0 +1,158 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import unittest +import tempfile +import paddle +import paddle.inference as paddle_infer +from paddle.fluid.framework import program_guard, Program +import numpy as np + +paddle.enable_static() + + +class UnittestBase(unittest.TestCase): + + def setUp(self): + self.temp_dir = tempfile.TemporaryDirectory() + self.init_info() + + def tearDwon(self): + self.temp_dir.cleanup() + + def init_info(self): + self.shapes = None + self.save_path = None + + def infer_prog(self): + config = paddle_infer.Config(self.save_path + '.pdmodel', + self.save_path + '.pdiparams') + predictor = paddle_infer.create_predictor(config) + input_names = predictor.get_input_names() + for i, shape in enumerate(self.shapes): + input_handle = predictor.get_input_handle(input_names[i]) + fake_input = np.random.randn(*shape).astype("float32") + input_handle.reshape(shape) + input_handle.copy_from_cpu(fake_input) + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + output_data = output_handle.copy_to_cpu() + + return output_data + + +class TestDropout(UnittestBase): + + def init_info(self): + self.shapes = [[10, 10]] + self.save_path = os.path.join(self.temp_dir.name, 'dropout') + + def test_static(self): + main_prog = Program() + starup_prog = Program() + with program_guard(main_prog, starup_prog): + fc = paddle.nn.Linear(10, 10) + x = paddle.randn(self.shapes[0]) + x.stop_gradient = False + feat = fc(x) + # p is a Variable + p = paddle.randn([1]) + out = paddle.nn.functional.dropout(feat, p=p) + sgd = paddle.optimizer.SGD() + sgd.minimize(paddle.mean(out)) + # test _to_string + self.assertTrue("Var[" in str(main_prog)) + + exe = paddle.static.Executor() + exe.run(starup_prog) + res = exe.run(fetch_list=[x, out]) + # export model + paddle.static.save_inference_model(self.save_path, [x], [out], exe) + + # Test for Inference Predictor + infer_out = self.infer_prog() + self.assertEqual(infer_out.shape, (10, 10)) + + +class TestTileTensorList(UnittestBase): + + def init_info(self): + self.shapes = [[2, 3, 4]] + self.save_path = os.path.join(self.temp_dir.name, 'tile_tensors') + + def test_static(self): + main_prog = Program() + starup_prog = Program() + with program_guard(main_prog, starup_prog): + fc = paddle.nn.Linear(4, 10) + x = paddle.randn([2, 3, 4]) + x.stop_gradient = False + feat = fc(x) + shape0 = paddle.full([1], 1, dtype='int32') + shape1 = paddle.full([1], 2, dtype='int32') + shape = [3, shape1, shape0] + out = paddle.tile(feat, shape) + + sgd = paddle.optimizer.SGD() + sgd.minimize(paddle.mean(out)) + self.assertTrue("Vars[" in str(main_prog)) + + exe = paddle.static.Executor() + exe.run(starup_prog) + res = exe.run(fetch_list=[x, out]) + self.assertEqual(res[1].shape, (6, 6, 10)) + + paddle.static.save_inference_model(self.save_path, [x], [out], exe) + # Test for Inference Predictor + infer_out = self.infer_prog() + self.assertEqual(infer_out.shape, (6, 6, 10)) + + +class TestTileTensor(UnittestBase): + + def init_info(self): + self.shapes = [[2, 3, 4]] + self.save_path = os.path.join(self.temp_dir.name, 'tile_tensor') + + def test_static(self): + main_prog = Program() + starup_prog = Program() + with program_guard(main_prog, starup_prog): + fc = paddle.nn.Linear(4, 10) + x = paddle.randn([2, 3, 4]) + x.stop_gradient = False + feat = fc(x) + # shape is a Variable + shape = paddle.assign([3, 2, 1]) + out = paddle.tile(feat, shape) + + sgd = paddle.optimizer.SGD() + sgd.minimize(paddle.mean(out)) + self.assertTrue("Var[" in str(main_prog)) + + exe = paddle.static.Executor() + exe.run(starup_prog) + res = exe.run(fetch_list=[x, out]) + self.assertEqual(res[1].shape, (6, 6, 10)) + + paddle.static.save_inference_model(self.save_path, [x], [out], exe) + # Test for Inference Predictor + infer_out = self.infer_prog() + self.assertEqual(infer_out.shape, (6, 6, 10)) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_dropout_op.py b/python/paddle/fluid/tests/unittests/test_dropout_op.py index 33992b1881ec4363e8e9f9f056e37697b6d0a3cd..30c4201a0b943c0960254d343829e5ce9811a958 100644 --- a/python/paddle/fluid/tests/unittests/test_dropout_op.py +++ b/python/paddle/fluid/tests/unittests/test_dropout_op.py @@ -1103,6 +1103,47 @@ class TestDropoutBackward(unittest.TestCase): self.cal_grad_upscale_train(mask.numpy(), prob))) +class TestDropOutWithProbTensor(unittest.TestCase): + + def setUp(self): + shapes = [[10, 10], [10, 10, 10], [10, 10, 10, 10]] + self.inputs = [ + np.random.random(shape).astype("float32") for shape in shapes + ] + self.place = paddle.CUDAPlace( + 0) if paddle.is_compiled_with_cuda() else paddle.CPUPlace() + + def api_case(self, x): + p = paddle.assign([0.5]) + out = paddle.nn.functional.dropout(x=x, p=p, training=True) + return out + + def run_static(self, x): + paddle.seed(2022) + main_program = Program() + + with program_guard(main_program): + input = paddle.static.data(shape=x.shape, name='x', dtype='float32') + out = self.api_case(input) + + exe = paddle.static.Executor(self.place) + res = exe.run(feed={'x': x}, fetch_list=[out]) + + return res[0] + + def run_dygraph(self, x): + paddle.seed(2022) + with fluid.dygraph.guard(self.place): + out = self.api_case(paddle.to_tensor(x)) + return out + + def test_p_tensor(self): + for x in self.inputs: + static_res = self.run_static(x) + dygraph_res = self.run_dygraph(x) + self.assertTrue(np.array_equal(static_res, dygraph_res)) + + class TestRandomValue(unittest.TestCase): def test_fixed_random_number(self): diff --git a/python/paddle/nn/functional/common.py b/python/paddle/nn/functional/common.py index ba75fb23b8b18b344c486e7f739f008a19c8a9e8..a7fad9a7c81e34a2acc92f265846d339d3e81105 100644 --- a/python/paddle/nn/functional/common.py +++ b/python/paddle/nn/functional/common.py @@ -1004,14 +1004,14 @@ def dropout(x, print(y_01) """ - # fast return for p == 0 - if p == 0: - return x - - if not isinstance(p, (float, int)): - raise TypeError("p argument should be a number") - if p < 0 or p > 1: - raise ValueError("p argument should between 0 and 1") + if not isinstance(p, (float, int, Variable)): + raise TypeError("p argument should be a number or Variable") + + if isinstance(p, (int, float)): + # fast return for p == 0 + if p == 0: return x + elif p < 0 or p > 1: + raise ValueError("p argument should between 0 and 1") if mode not in ('downscale_in_infer', 'upscale_in_train'): raise ValueError( "mode argument should be 'downscale_in_infer' or 'upscale_in_train'" @@ -1050,6 +1050,12 @@ def dropout(x, def get_attrs(prog, dropout_prob, is_test, seed): if (seed is None or seed == 0) and prog.random_seed != 0: seed = prog.random_seed + + if isinstance(dropout_prob, + Variable) and not dropout_prob.shape != [1]: + raise TypeError( + "Required p.shape == [1] if type(p) is Variable, but received p.shape = {}" + .format(p.shape)) attrs = { 'dropout_prob': dropout_prob, 'is_test': is_test, diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py index 0b1a22865d506ae37079d0ff469348a51322e8af..05dc8035c5d59e4090f082b4c4005f234d16cf21 100755 --- a/python/paddle/tensor/manipulation.py +++ b/python/paddle/tensor/manipulation.py @@ -1117,9 +1117,7 @@ def concat(x, axis=0, name=None): attrs = {} if isinstance(axis, Variable): axis.stop_gradient = True - inputs['AxisTensor'] = axis - else: - attrs['axis'] = axis + attrs['axis'] = axis helper.append_op(type='concat', inputs=inputs, @@ -2937,13 +2935,11 @@ def tile(x, repeat_times, name=None): if isinstance(repeat_times, Variable): repeat_times.stop_gradient = True - inputs['RepeatTimes'] = repeat_times - attrs['repeat_times'] = [-1] + attrs['repeat_times'] = repeat_times elif isinstance(repeat_times, (list, tuple)): attrs['repeat_times'] = get_attr_repeat_times(repeat_times) if utils._contain_var(repeat_times): - inputs['repeat_times_tensor'] = utils._convert_to_tensor_list( - repeat_times) + attrs['repeat_times'] = utils._convert_to_tensor_list(repeat_times) dtype = helper.input_dtype(input_param_name='x') out = helper.create_variable_for_type_inference(dtype)