From fce6466217fd11baf70a2e63f2902ec767810967 Mon Sep 17 00:00:00 2001 From: wanghuancoder Date: Wed, 15 Jul 2020 11:50:10 +0800 Subject: [PATCH] fix some errmsg report, in framework/ir/ subdir(memory,optimizer,multi_device) (#25460) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix paddle/fluid/framework/ir/multi_devices_graph_pass/ error msg reoprt, test=develop * fix paddle/fluid/framework/ir/memory_optimize_pass/ error msg reoprt, test=develop * fix paddle/fluid/framework/ir/fuse_optimizer_ops_pass/ error msg reoprt, test=develop * fix paddle/fluid/framework/ir/memory_optimize_pass/ error msg reoprt about PADDLE_ENFORCE, test=develop * modify error msg reoprt,about errortype,grammar. test=develop * modify error msg reoprt,about PADDLE_ENFORCE to PADDLE_ENFORCE_XXX, test=develop * modify error msg reoprt,about PADDLE_ENFORCE to PADDLE_ENFORCE_XXX, and %s to %d, test=develop * modified some error descriptions, test=develop --- .../fuse_adam_op_pass.cc | 123 +++++++++++++++--- .../fuse_momentum_op_pass.cc | 28 +++- .../fuse_optimizer_op_pass.cc | 91 ++++++++----- .../fuse_sgd_op_pass.cc | 4 +- ...uffer_shared_cross_op_memory_reuse_pass.cc | 52 ++++++-- .../buffer_shared_inplace_op_pass.cc | 4 +- .../eager_deletion_pass.cc | 8 +- .../memory_optimization_var_info.h | 6 +- .../memory_optimize_pass/memory_reuse_pass.cc | 55 +++++--- .../ir/memory_optimize_pass/op_graph_view.cc | 8 +- .../ir/memory_optimize_pass/op_graph_view.h | 10 +- .../reference_count_pass.cc | 42 +++--- .../all_reduce_deps_pass.cc | 44 ++++--- .../backward_optimizer_op_deps_pass.cc | 7 +- .../fuse_all_reduce_op_pass.cc | 40 ++++-- .../multi_devices_graph_check_pass.cc | 5 +- .../multi_devices_graph_pass.cc | 116 +++++++++++++---- .../multi_devices_graph_print_pass.cc | 5 +- .../sequential_execution_pass.cc | 26 ++-- 19 files changed, 495 insertions(+), 179 deletions(-) diff --git a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_adam_op_pass.cc b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_adam_op_pass.cc index 482d8cf3d2..c284c1f458 100644 --- a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_adam_op_pass.cc +++ b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_adam_op_pass.cc @@ -50,18 +50,25 @@ class FuseAdamOpPass : public FuseOptimizerOpPass { fused_scale2->inputs.end()); for (auto &out_node : fused_scale1->outputs) { if (fused_scale2_in_nodes.count(out_node)) { - PADDLE_ENFORCE(out_node->IsCtrlVar(), - "The dependency var only should be ctrl var."); + PADDLE_ENFORCE_EQ(out_node->IsCtrlVar(), true, + platform::errors::PreconditionNotMet( + "In adam op pass, the dependency var(%s) only " + "should be ctrl var.", + out_node->Name())); not_need_ctrl_var_nodes.insert(out_node); } } for (auto &node : not_need_ctrl_var_nodes) { // remove this node from the input op node. - PADDLE_ENFORCE(!node->inputs.empty(), - "The input should not be empty here."); + PADDLE_ENFORCE_EQ( + node->inputs.empty(), false, + platform::errors::PreconditionNotMet( + "Node(%s)'s input should not be empty here.", node->Name())); auto op_node = node->inputs.front(); - PADDLE_ENFORCE(op_node->IsOp()); + PADDLE_ENFORCE_EQ(op_node->IsOp(), true, + platform::errors::PreconditionNotMet( + "Node(%s) should be an OP node.", op_node->Name())); op_node->outputs.erase( remove_if( op_node->outputs.begin(), op_node->outputs.end(), @@ -85,7 +92,9 @@ class FuseAdamOpPass : public FuseOptimizerOpPass { const std::unordered_map> &vars_set, const std::unordered_map &fused_vars_name, const std::vector &adam_ops, ir::Graph *graph) const { - PADDLE_ENFORCE_GT(adam_ops.size(), static_cast(0)); + PADDLE_ENFORCE_GT( + adam_ops.size(), static_cast(0), + platform::errors::InvalidArgument("No adam op in the graph.")); // Check attributions // NOTE: If new attribution is added, the following code maybe need change. @@ -102,22 +111,58 @@ class FuseAdamOpPass : public FuseOptimizerOpPass { int64_t, adam_ops[0]->Op()->GetAttr("min_row_size_to_use_multithread")); for (auto &adam_op : adam_ops) { PADDLE_ENFORCE_EQ( - beta1, BOOST_GET_CONST(float, adam_op->Op()->GetAttr("beta1"))); + beta1, BOOST_GET_CONST(float, adam_op->Op()->GetAttr("beta1")), + platform::errors::PreconditionNotMet( + "All adam Op's attr(beta1) must be same, but there are two " + "different " + "value: %f, %f.", + beta1, BOOST_GET_CONST(float, adam_op->Op()->GetAttr("beta1")))); PADDLE_ENFORCE_EQ( - beta2, BOOST_GET_CONST(float, adam_op->Op()->GetAttr("beta2"))); + beta2, BOOST_GET_CONST(float, adam_op->Op()->GetAttr("beta2")), + platform::errors::PreconditionNotMet( + "All adam Op's attr(beta2) must be same, but there are two " + "different " + "value: %f, %f.", + beta2, BOOST_GET_CONST(float, adam_op->Op()->GetAttr("beta2")))); PADDLE_ENFORCE_EQ( - epsilon, BOOST_GET_CONST(float, adam_op->Op()->GetAttr("epsilon"))); + epsilon, BOOST_GET_CONST(float, adam_op->Op()->GetAttr("epsilon")), + platform::errors::PreconditionNotMet( + "All adam Op's attr(epsilon) must be same, but there are two " + "different " + "value: %f, %f.", + epsilon, + BOOST_GET_CONST(float, adam_op->Op()->GetAttr("epsilon")))); PADDLE_ENFORCE_EQ( - lazy_mode, - BOOST_GET_CONST(bool, adam_op->Op()->GetAttr("lazy_mode"))); + lazy_mode, BOOST_GET_CONST(bool, adam_op->Op()->GetAttr("lazy_mode")), + platform::errors::PreconditionNotMet( + "All adam Op's attr(lazy_mode) must be same, but there are two " + "different " + "value: %d, %d.", + lazy_mode, + BOOST_GET_CONST(bool, adam_op->Op()->GetAttr("lazy_mode")))); PADDLE_ENFORCE_EQ( min_row_size_to_use_multithread, BOOST_GET_CONST(int64_t, adam_op->Op()->GetAttr( - "min_row_size_to_use_multithread"))); + "min_row_size_to_use_multithread")), + platform::errors::PreconditionNotMet( + "All adam Op's attr(min_row_size_to_use_multithread) must be " + "same, but there are two different value: %I64, %I64.", + min_row_size_to_use_multithread, + BOOST_GET_CONST( + int64_t, + adam_op->Op()->GetAttr("min_row_size_to_use_multithread")))); PADDLE_ENFORCE_EQ( op_role, BOOST_GET_CONST(int, adam_op->Op()->GetAttr( - OpProtoAndCheckerMaker::OpRoleAttrName()))); + OpProtoAndCheckerMaker::OpRoleAttrName())), + platform::errors::PreconditionNotMet( + "All adam Op's attr(op_role) must be same, but there are two " + "different " + "value: %d, %d.", + op_role, + BOOST_GET_CONST(int, + adam_op->Op()->GetAttr( + OpProtoAndCheckerMaker::OpRoleAttrName())))); } // NOTE: fused_var is only exist in scope, so the graph doesn't have @@ -154,7 +199,10 @@ class FuseAdamOpPass : public FuseOptimizerOpPass { const std::string &fused_var_name, const std::vector &adam_ops, ir::Graph *graph) const { - PADDLE_ENFORCE_EQ(beta_name.size(), adam_ops.size()); + PADDLE_ENFORCE_EQ(beta_name.size(), adam_ops.size(), + platform::errors::InvalidArgument( + "Beta name size(%d) must equal to adam op size(%d).", + beta_name.size(), adam_ops.size())); const std::string scale_op_name = "scale"; // Get the scale_ops of dealing the adam's beta var. @@ -168,7 +216,9 @@ class FuseAdamOpPass : public FuseOptimizerOpPass { return var_node->Var() && var_node->Var()->Name() == beta_1_pow_name; }); - PADDLE_ENFORCE(beta_pow_iter != adam_ops[i]->inputs.end()); + PADDLE_ENFORCE_NE(beta_pow_iter, adam_ops[i]->inputs.end(), + platform::errors::NotFound( + "Can not find %s in adam ops.", beta_1_pow_name)); auto beta_pow_node = *beta_pow_iter; auto scale_op_iter = std::find_if( @@ -176,11 +226,18 @@ class FuseAdamOpPass : public FuseOptimizerOpPass { [&scale_op_name](ir::Node *op_node) -> bool { return op_node->Op() && op_node->Op()->Type() == scale_op_name; }); - PADDLE_ENFORCE(scale_op_iter != beta_pow_node->outputs.end()); + PADDLE_ENFORCE_NE( + scale_op_iter, beta_pow_node->outputs.end(), + platform::errors::NotFound("Can not find %s in beta pow node.", + scale_op_name)); scale_ops.emplace_back(*scale_op_iter); } - PADDLE_ENFORCE_EQ(scale_ops.size(), beta_name.size()); + PADDLE_ENFORCE_EQ( + scale_ops.size(), beta_name.size(), + platform::errors::PreconditionNotMet( + "Beta name size(%d) must equal to scale ops size(%d).", + beta_name.size(), scale_ops.size())); VLOG(6) << "The number of scale op is " << scale_ops.size() << "."; // Check attributions // NOTE: If new attribution is added, the following code maybe need change. @@ -193,16 +250,40 @@ class FuseAdamOpPass : public FuseOptimizerOpPass { BOOST_GET_CONST(bool, scale_ops[0]->Op()->GetAttr("bias_after_scale")); for (auto &scale_op : scale_ops) { PADDLE_ENFORCE_EQ( - scale, BOOST_GET_CONST(float, scale_op->Op()->GetAttr("scale"))); + scale, BOOST_GET_CONST(float, scale_op->Op()->GetAttr("scale")), + platform::errors::PreconditionNotMet( + "All scale Op's attr(scale) must be same, but there are two " + "different " + "value: %f, %f.", + scale, BOOST_GET_CONST(float, scale_op->Op()->GetAttr("scale")))); PADDLE_ENFORCE_EQ( - bias, BOOST_GET_CONST(float, scale_op->Op()->GetAttr("bias"))); + bias, BOOST_GET_CONST(float, scale_op->Op()->GetAttr("bias")), + platform::errors::PreconditionNotMet( + "All scale Op's attr(bias) must be same, but there are two " + "different " + "value: %f, %f.", + bias, BOOST_GET_CONST(float, scale_op->Op()->GetAttr("bias")))); PADDLE_ENFORCE_EQ( bias_after_scale, - BOOST_GET_CONST(bool, scale_op->Op()->GetAttr("bias_after_scale"))); + BOOST_GET_CONST(bool, scale_op->Op()->GetAttr("bias_after_scale")), + platform::errors::PreconditionNotMet( + "All scale Op's attr(bias_after_scale) must be same, but there " + "are two different value: %d, %d.", + bias_after_scale, + BOOST_GET_CONST(bool, + scale_op->Op()->GetAttr("bias_after_scale")))); PADDLE_ENFORCE_EQ( op_role, BOOST_GET_CONST(int, scale_op->Op()->GetAttr( - OpProtoAndCheckerMaker::OpRoleAttrName()))); + OpProtoAndCheckerMaker::OpRoleAttrName())), + platform::errors::PreconditionNotMet( + "All scale Op's attr(op_role) must be same, but there are two " + "different " + "value: %d, %d.", + op_role, + BOOST_GET_CONST(int, + scale_op->Op()->GetAttr( + OpProtoAndCheckerMaker::OpRoleAttrName())))); } // NOTE: fused_var is only exist in scope, so the graph doesn't have diff --git a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_momentum_op_pass.cc b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_momentum_op_pass.cc index f70745be1b..43ec8bff5e 100644 --- a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_momentum_op_pass.cc +++ b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_momentum_op_pass.cc @@ -37,7 +37,9 @@ class FuseMomentumOpPass : public FuseOptimizerOpPass { const std::unordered_map> &vars_set, const std::unordered_map &fused_vars_name, const std::vector &momentum_ops, ir::Graph *graph) const { - PADDLE_ENFORCE_GT(momentum_ops.size(), static_cast(0)); + PADDLE_ENFORCE_GT( + momentum_ops.size(), static_cast(0), + platform::errors::InvalidArgument("Momentum ops must not be empyt.")); // Check attributions // NOTE: If new attribution is added, the following code maybe need change. @@ -50,14 +52,32 @@ class FuseMomentumOpPass : public FuseOptimizerOpPass { for (auto &momentum_op : momentum_ops) { PADDLE_ENFORCE_EQ( - mu, BOOST_GET_CONST(float, momentum_op->Op()->GetAttr("mu"))); + mu, BOOST_GET_CONST(float, momentum_op->Op()->GetAttr("mu")), + platform::errors::InvalidArgument( + "All momentum Op's attr(mu) must be same, but there are two " + "different " + "value: %f, %f.", + mu, BOOST_GET_CONST(float, momentum_op->Op()->GetAttr("mu")))); PADDLE_ENFORCE_EQ( use_nesterov, - BOOST_GET_CONST(bool, momentum_op->Op()->GetAttr("use_nesterov"))); + BOOST_GET_CONST(bool, momentum_op->Op()->GetAttr("use_nesterov")), + platform::errors::InvalidArgument( + "All momentum Op's attr(use_nesterov) must be same, but there " + "are two different value: %d, %d.", + use_nesterov, BOOST_GET_CONST(bool, momentum_op->Op()->GetAttr( + "use_nesterov")))); PADDLE_ENFORCE_EQ( op_role, BOOST_GET_CONST(int, momentum_op->Op()->GetAttr( - OpProtoAndCheckerMaker::OpRoleAttrName()))); + OpProtoAndCheckerMaker::OpRoleAttrName())), + platform::errors::InvalidArgument( + "All momentum Op's attr(op_role) must be same, but there are two " + "different " + "value: %d, %d.", + op_role, + BOOST_GET_CONST(int, + momentum_op->Op()->GetAttr( + OpProtoAndCheckerMaker::OpRoleAttrName())))); } // NOTE: fused_var is only exist in scope, so the graph doesn't have diff --git a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc index 35bdfde96b..fa86db891f 100644 --- a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc +++ b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc @@ -41,10 +41,12 @@ void FuseOptimizerOpPass::ApplyImpl(ir::Graph *graph) const { for (auto &node : topo_nodes) { if (node->Op()->Type() == fuse_op_type) { auto grad_name = node->Op()->Input(kGrad); - PADDLE_ENFORCE_EQ(grad_name.size(), static_cast(1), - "The %s operator has multiple gradient input. Expected " - "it to only have one gradient input.", - fuse_op_type); + PADDLE_ENFORCE_EQ( + grad_name.size(), static_cast(1), + platform::errors::InvalidArgument( + "The %s operator has multiple gradient input. Expected " + "it to only have one gradient input.", + fuse_op_type)); if (IsLoDTensorType(GetTypeOfVar(vars_info, grad_name[0]))) { opt_nodes.emplace_back(node); } @@ -96,7 +98,8 @@ void FuseOptimizerOpPass::ApplyImpl(ir::Graph *graph) const { VLOG(6) << var_name << ": " << fused_var_name; PADDLE_ENFORCE_EQ( fused_var_set.count(fused_var_name), 0, - platform::errors::AlreadyExists("The fused variable already exists.")); + platform::errors::AlreadyExists( + "The fused variable(%s) already exists.", fused_var_name)); fused_var_set.insert(fused_var_name); fused_vars_name.emplace(var_name, fused_var_name); } @@ -110,7 +113,10 @@ void FuseOptimizerOpPass::ApplyImpl(ir::Graph *graph) const { result.Get(details::kParamsAndDenseGrads); PADDLE_ENFORCE_LE( params_and_dense_grads.size(), aux_var_map.at(kGrad).size(), - "The number of dense gradients should be little than optimizer ops."); + platform::errors::InvalidArgument( + "The number of dense gradients(%d) should be " + "little than optimizer ops(%d).", + params_and_dense_grads.size(), aux_var_map.at(kGrad).size())); std::unordered_set opt_grad_set(aux_var_map.at(kGrad).size()); for (auto &p_g : params_and_dense_grads) { @@ -130,13 +136,14 @@ void FuseOptimizerOpPass::ApplyImpl(ir::Graph *graph) const { // some gradient's name maybe changed. if (new_grad_idx.size() == 0) { if (!result.Has(details::kFusedGrads)) { - PADDLE_THROW( + PADDLE_THROW(platform::errors::PreconditionNotMet( "The coalesce_grad_tensor_pass should " - "be called before this pass."); + "be called before this pass.")); } auto &fused_grad = result.Get(details::kFusedGrads); PADDLE_ENFORCE_NE(fused_grad.size(), 0, - "The fused gradient should not be empty."); + platform::errors::NotFound( + "The fused gradient should not be empty.")); if (fused_grad.size() > 1) { // Note(chenweihang): Because the dtype of those gradients is not // unified,so the number of fused gradients is more than one, @@ -146,8 +153,9 @@ void FuseOptimizerOpPass::ApplyImpl(ir::Graph *graph) const { auto &fused_vars = result.Get(details::kFusedVars); auto iter = std::find(fused_vars.begin(), fused_vars.end(), fused_grad.front()); - PADDLE_ENFORCE_EQ(iter != fused_vars.end(), true, - "Not found the fused gradient variable."); + PADDLE_ENFORCE_EQ( + iter != fused_vars.end(), true, + platform::errors::NotFound("Not found the fused gradient variable.")); fused_vars_name[kGrad] = fused_grad.front(); // Sort the parameters and auxiliary variables according @@ -334,16 +342,24 @@ void FuseOptimizerOpPass::FuseGradientsToContinuousSpace( // The Gradients should not be reused during memory optimization. for (auto &grad_var_name : grads) { auto iter = vars_info.find(grad_var_name); - PADDLE_ENFORCE_EQ(iter != vars_info.end(), true, - "The gradient variable %s is not found.", grad_var_name); - PADDLE_ENFORCE_EQ(!iter->second.empty(), true, - "The gradient var node %s is not found.", grad_var_name); - PADDLE_ENFORCE_NOT_NULL(iter->second.front()->Var(), - "The gradient var node is null."); + PADDLE_ENFORCE_EQ( + iter != vars_info.end(), true, + platform::errors::NotFound("The gradient variable %s is not found.", + grad_var_name)); + PADDLE_ENFORCE_EQ( + !iter->second.empty(), true, + platform::errors::NotFound("The gradient var node %s is not found.", + grad_var_name)); + PADDLE_ENFORCE_NOT_NULL( + iter->second.front()->Var(), + platform::errors::InvalidArgument("The gradient var(%s) node is null.", + grad_var_name)); PADDLE_ENFORCE_EQ( IsLoDTensorType(iter->second.front()->Var()->GetType()), true, - "Currently the gradient type only should be LoDTensor when " - "fusing optimizer ops."); + platform::errors::InvalidArgument( + "Currently the gradient(%s) type only should be LoDTensor when " + "fusing optimizer ops.", + grad_var_name)); for (auto var : iter->second) { pinned_var_set.insert(var->Var()->Name()); } @@ -382,11 +398,14 @@ const VarDesc *FuseOptimizerOpPass::GetVarDescFromVarsInfo( const std::string &var_name) const { auto grad_iter = vars_info.find(var_name); PADDLE_ENFORCE_EQ(grad_iter != vars_info.end(), true, - "The gradient variable %s is not found.", var_name); + platform::errors::NotFound( + "The gradient variable %s is not found.", var_name)); PADDLE_ENFORCE_EQ(!grad_iter->second.empty(), true, - "The gradient var node %s is not found.", var_name); + platform::errors::NotFound( + "The gradient var node %s is not found.", var_name)); PADDLE_ENFORCE_NOT_NULL(grad_iter->second.front()->Var(), - "The gradient var node is null."); + platform::errors::InvalidArgument( + "The gradient var(%s) node is null.", var_name)); return grad_iter->second.front()->Var(); } @@ -428,8 +447,9 @@ void FuseOptimizerOpPass::SortParametersAndAuxVars( const std::vector> ¶ms_grads, std::unordered_map> *aux_var_map, std::vector *ops) const { - PADDLE_ENFORCE_NE(aux_var_map->count(kGrad), static_cast(0), - "The gradient variable doesn‘t exist."); + PADDLE_ENFORCE_NE( + aux_var_map->count(kGrad), static_cast(0), + platform::errors::NotFound("The gradient variable doesn‘t exist.")); auto &grad_vec = aux_var_map->at(kGrad); std::vector grad_sort_idx; @@ -437,8 +457,10 @@ void FuseOptimizerOpPass::SortParametersAndAuxVars( for (auto &p_g : params_grads) { auto iter = std::find(grad_vec.begin(), grad_vec.end(), p_g.second); - PADDLE_ENFORCE_EQ(iter != grad_vec.end(), true, - "%s is not found in gradient vector", p_g.second); + PADDLE_ENFORCE_EQ( + iter != grad_vec.end(), true, + platform::errors::NotFound( + "Parameter@Grad(%s) is not found in gradient vector.", p_g.second)); auto idx = std::distance(grad_vec.begin(), iter); grad_sort_idx.emplace_back(idx); } @@ -477,9 +499,10 @@ void FuseOptimizerOpPass::GetFusingVarNamesMap( for (auto &var_n : aux_vars_name) { auto arg_names = node->Op()->Input(var_n); PADDLE_ENFORCE_EQ(arg_names.size(), static_cast(1), - "The input variable of optimizer to be fused is " - "invalid. Excepted %s only has one %s input.", - node->Op()->Type(), var_n); + platform::errors::InvalidArgument( + "The input variable of optimizer to be fused is " + "invalid. Excepted %s only has one %s input.", + node->Op()->Type(), var_n)); (*aux_args_name)[var_n].emplace_back(arg_names[0]); } } @@ -525,10 +548,14 @@ void FuseOptimizerOpPass::InsertInputAndOutputForFusedOpNode( auto deal_with_ctrl_vars = [&out_dep_vars, ¬_useful_vars, &fused_opt_node](ir::Node *ctr_var_node) { PADDLE_ENFORCE_EQ(ctr_var_node->inputs.size(), 1, - "The control var node has nultiple inputs."); + platform::errors::InvalidArgument( + "The control var(%s) node has multiple inputs.", + ctr_var_node->Name())); if (ctr_var_node->inputs.front() == fused_opt_node) { - PADDLE_ENFORCE_GT(ctr_var_node->outputs.size(), 0, - "The control var node has no output."); + PADDLE_ENFORCE_GT( + ctr_var_node->outputs.size(), 0, + platform::errors::InvalidArgument( + "The control var(%s) node has no output.", ctr_var_node->Name())); auto output_ops = ctr_var_node->outputs; output_ops.erase(std::remove_if(output_ops.begin(), output_ops.end(), [&fused_opt_node](const ir::Node *node) { diff --git a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_sgd_op_pass.cc b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_sgd_op_pass.cc index 1504f00b27..70d4d2b865 100644 --- a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_sgd_op_pass.cc +++ b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_sgd_op_pass.cc @@ -35,7 +35,9 @@ class FuseSgdOpPass : public FuseOptimizerOpPass { const std::unordered_map> &vars_set, const std::unordered_map &fused_vars_name, const std::vector &sgd_ops, ir::Graph *graph) const { - PADDLE_ENFORCE_GT(sgd_ops.size(), static_cast(0)); + PADDLE_ENFORCE_GT( + sgd_ops.size(), static_cast(0), + platform::errors::InvalidArgument("SGD ops must not be empyt.")); // NOTE: fused_var is only exist in scope, so the graph doesn't have // fused_var node. diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_cross_op_memory_reuse_pass.cc b/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_cross_op_memory_reuse_pass.cc index 6ce1420362..b1afa47910 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_cross_op_memory_reuse_pass.cc +++ b/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_cross_op_memory_reuse_pass.cc @@ -116,7 +116,10 @@ std::vector BufferSharedCrossOpMemoryReusePass::SortOp( graph_view.BreadthFirstVisit( [&](OpHandleBase *cur_op) { sorted_ops.emplace_back(cur_op); }); PADDLE_ENFORCE_EQ(sorted_ops.size(), graph_view.OpNumber(), - "There are unvisited ops"); + platform::errors::InvalidArgument( + "Sorted ops size(%d) not equal to graph op size(%d). " + "There are unvisited ops.", + sorted_ops.size(), graph_view.OpNumber())); return sorted_ops; } @@ -181,7 +184,9 @@ void BufferSharedCrossOpMemoryReusePass::RunOnScopeIdx(size_t idx) const { auto *out_node = *(out_nodes.begin()); auto *out_var = dynamic_cast(&(out_node->Wrapper())); - PADDLE_ENFORCE_NOT_NULL(out_var); + PADDLE_ENFORCE_NOT_NULL( + out_var, platform::errors::NotFound( + "Can not find a valid Var Node for Var %s.", out_arg)); // If out_arg is not reusable, skip it if (!IsOutVarReusable(*out_var)) { @@ -269,7 +274,8 @@ size_t BufferSharedCrossOpMemoryReusePass::ResolveDependencyBetween( auto op_dep = GetOpDep(prev_op, op); if (op_dep == NodeDependency::kBefore) continue; PADDLE_ENFORCE_EQ(op_dep, NodeDependency::kNoDep, - "The graph has circle, this may be a bug"); + platform::errors::InvalidArgument( + "The graph has circle, this may be a bug.")); auto iter = std::find_if(prev_op->Outputs().begin(), prev_op->Outputs().end(), @@ -316,9 +322,13 @@ size_t BufferSharedCrossOpMemoryReusePass::ResolveDependencyBetween( } void BufferSharedCrossOpMemoryReusePass::BuildOpDependencyMap() const { - PADDLE_ENFORCE(ops_.empty(), "ops_ must be initialized here"); - PADDLE_ENFORCE(op_to_idx_.empty(), "op_to_idx_ must be initialized here"); - PADDLE_ENFORCE(deps_.empty(), "deps_ must be initialized here"); + PADDLE_ENFORCE_EQ(ops_.empty(), true, platform::errors::InvalidArgument( + "Ops must be initialized here.")); + PADDLE_ENFORCE_EQ( + op_to_idx_.empty(), true, + platform::errors::InvalidArgument("Op to idx must be initialized here.")); + PADDLE_ENFORCE_EQ(deps_.empty(), true, platform::errors::InvalidArgument( + "Deps must be initialized here.")); // Toposort ops OpGraphView graph_view(ir::FilterByNodeWrapper(*graph_)); @@ -344,7 +354,10 @@ void BufferSharedCrossOpMemoryReusePass::BuildOpDependencyMap() const { prev_preceding_ops.end()); } }); - PADDLE_ENFORCE_EQ(preceding_ops.size(), op_num); + PADDLE_ENFORCE_EQ(preceding_ops.size(), op_num, + platform::errors::InvalidArgument( + "Preceding ops size(%d) must equal to op num(%d).", + preceding_ops.size(), op_num)); // Find out ComputationOpHandles only ops_.resize(scope_num); @@ -384,28 +397,43 @@ void BufferSharedCrossOpMemoryReusePass::BuildOpDependencyMap() const { size_t BufferSharedCrossOpMemoryReusePass::OpIndex( const ComputationOpHandle *op) const { auto iter = op_to_idx_[op->GetScopeIdx()].find(op); - PADDLE_ENFORCE(iter != op_to_idx_[op->GetScopeIdx()].end()); + PADDLE_ENFORCE_NE(iter, op_to_idx_[op->GetScopeIdx()].end(), + platform::errors::NotFound( + "Can not find op(%s) in op_to_idx_.", op->Name())); return iter->second; } NodeDependency BufferSharedCrossOpMemoryReusePass::GetOpDep( const ComputationOpHandle *op1, const ComputationOpHandle *op2) const { - PADDLE_ENFORCE_EQ(op1->GetScopeIdx(), op2->GetScopeIdx()); + PADDLE_ENFORCE_EQ(op1->GetScopeIdx(), op2->GetScopeIdx(), + platform::errors::InvalidArgument( + "Op(%s) and op(%s) must in the same scope.", + op1->Name(), op2->Name())); return deps_[op1->GetScopeIdx()][OpIndex(op1)][OpIndex(op2)]; } void BufferSharedCrossOpMemoryReusePass::SetOpDep( const ComputationOpHandle *op1, const ComputationOpHandle *op2, NodeDependency dep) const { - PADDLE_ENFORCE_EQ(op1->GetScopeIdx(), op2->GetScopeIdx()); + PADDLE_ENFORCE_EQ(op1->GetScopeIdx(), op2->GetScopeIdx(), + platform::errors::InvalidArgument( + "Op(%s) and op(%s) must in the same scope.", + op1->Name(), op2->Name())); if (op1 == op2) { - PADDLE_ENFORCE(dep == NodeDependency::kSame); + PADDLE_ENFORCE_EQ( + dep, NodeDependency::kSame, + platform::errors::InvalidArgument( + "Set Same Op(%s) Dep, dep must be kSame type.", op1->Name())); auto idx = OpIndex(op1); deps_[op1->GetScopeIdx()][idx][idx] = NodeDependency::kSame; } else { auto idx1 = OpIndex(op1); auto idx2 = OpIndex(op2); - PADDLE_ENFORCE(dep != NodeDependency::kSame && idx1 != idx2); + PADDLE_ENFORCE_EQ((dep != NodeDependency::kSame && idx1 != idx2), true, + platform::errors::InvalidArgument( + "Op(%s) and Op(%s) should not have same " + "index(%d), and dep should not kSame type.", + op1->Name(), op2->Name(), idx1)); deps_[op1->GetScopeIdx()][idx1][idx2] = dep; deps_[op1->GetScopeIdx()][idx2][idx1] = ReverseNodeDependency(dep); } diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_inplace_op_pass.cc b/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_inplace_op_pass.cc index 338a608b4a..0b42f2ebd5 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_inplace_op_pass.cc +++ b/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_inplace_op_pass.cc @@ -57,7 +57,9 @@ void BufferSharedInplaceOpPass::Run(Graph *graph) const { auto *op = *(pair.second.ops().begin()); const std::string &op_type = op->GetOp()->Type(); const framework::OpDesc *op_desc = op->Node()->Op(); - PADDLE_ENFORCE_NOT_NULL(op_desc); + PADDLE_ENFORCE_NOT_NULL( + op_desc, platform::errors::NotFound("Op(%s) can not find opdesc.", + op->Name())); auto &infer_inplace = OpInfoMap::Instance().Get(op_type).infer_inplace_; if (!infer_inplace) { diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/eager_deletion_pass.cc b/paddle/fluid/framework/ir/memory_optimize_pass/eager_deletion_pass.cc index 9a322bdc1d..7b9b5aa623 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/eager_deletion_pass.cc +++ b/paddle/fluid/framework/ir/memory_optimize_pass/eager_deletion_pass.cc @@ -58,8 +58,12 @@ static int64_t GetMemorySize( &vars, const std::string &var_name) { auto *var_desc = TryGetLatestVarDesc(vars.at(var_name)); - PADDLE_ENFORCE_NOT_NULL(var_desc); - PADDLE_ENFORCE(IsLoDTensor(var_desc)); + PADDLE_ENFORCE_NOT_NULL( + var_desc, + platform::errors::NotFound("Var(%s) can not find VarDesc.", var_name)); + PADDLE_ENFORCE_EQ(IsLoDTensor(var_desc), true, + platform::errors::InvalidArgument( + "Var(%s) must be LoDTensor.", var_name)); auto dims = var_desc->GetShape(); return SizeOfType(var_desc->GetDataType()) * std::accumulate(dims.begin(), dims.end(), static_cast(1), diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/memory_optimization_var_info.h b/paddle/fluid/framework/ir/memory_optimize_pass/memory_optimization_var_info.h index 4f6bacecab..9484248544 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/memory_optimization_var_info.h +++ b/paddle/fluid/framework/ir/memory_optimize_pass/memory_optimization_var_info.h @@ -42,8 +42,10 @@ class MemOptVarInfo { } void SetRefCnt(size_t ref_cnt) { - PADDLE_ENFORCE_GE(ref_cnt, 1, - "Reference count must be larger than or equal to 1"); + PADDLE_ENFORCE_GE( + ref_cnt, 1, + platform::errors::InvalidArgument( + "Reference count(%d) must be larger than or equal to 1.", ref_cnt)); ref_cnt_ = ref_cnt; runtime_ref_cnt_ = ref_cnt; } diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.cc b/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.cc index 20c7968d6a..221b0a76e7 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.cc +++ b/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.cc @@ -66,7 +66,11 @@ bool MemoryReusePass::TryReuseVar(details::VarHandle *in_var, details::VarHandle *out_var) const { auto *op = dynamic_cast(out_var->GeneratedOp()); - PADDLE_ENFORCE_NOT_NULL(op); + PADDLE_ENFORCE_NOT_NULL( + op, + platform::errors::InvalidArgument( + "Var(%s) have no GeneratedOp, or it's op is not ComputationOpHandle.", + out_var->Name())); if (IsVarPairReusable(*in_var, *out_var)) { AddReuseVar(op, in_var, out_var); return true; @@ -91,10 +95,13 @@ VarDesc *MemoryReusePass::GetVarDesc(const details::VarHandle &var) const { size_t scope_idx = var.scope_idx(); auto iter = var_descs_[scope_idx].find(var_name); if (iter == var_descs_[scope_idx].end()) { - PADDLE_ENFORCE((*all_vars_)[scope_idx].count(var_name), - "Variable %s not found", var_name); + PADDLE_ENFORCE_NE( + (*all_vars_)[scope_idx].count(var_name), 0, + platform::errors::NotFound("Variable %s not found.", var_name)); auto *desc = TryGetLatestVarDesc((*all_vars_)[scope_idx].at(var_name)); - PADDLE_ENFORCE_NOT_NULL(desc); + PADDLE_ENFORCE_NOT_NULL( + desc, + platform::errors::NotFound("Var(%s) can not find VarDesc.", var_name)); var_descs_[scope_idx].emplace(var_name, desc); return desc; } else { @@ -119,7 +126,9 @@ void MemoryReusePass::CollectShareTensorBufferOpHandles() const { if (share_buffer_op != nullptr) { auto *compute_op = details::GetUniquePendingComputationOpHandle(share_buffer_op); - PADDLE_ENFORCE(ops_.count(compute_op) == 0); + PADDLE_ENFORCE_EQ( + ops_.count(compute_op), 0, + platform::errors::AlreadyExists("Compute op already exists.")); ops_.emplace(compute_op, share_buffer_op); } } @@ -227,8 +236,11 @@ bool MemoryReusePass::IsInVarReusable(const details::VarHandle &in_var) const { */ bool MemoryReusePass::IsOutVarReusable( const details::VarHandle &out_var) const { - PADDLE_ENFORCE_NOT_NULL(dynamic_cast( - out_var.GeneratedOp())); + PADDLE_ENFORCE_NOT_NULL( + dynamic_cast(out_var.GeneratedOp()), + platform::errors::InvalidArgument( + "Var(%s) have no GeneratedOp, or it's op is not ComputationOpHandle.", + out_var.Name())); const auto out_name = out_var.Name(); if (out_name == kEmptyVarName) { return false; @@ -236,9 +248,10 @@ bool MemoryReusePass::IsOutVarReusable( // out_var must be the first version!!! auto out_var_iter = (*all_vars_)[out_var.scope_idx()].find(out_name); - PADDLE_ENFORCE(out_var_iter != (*all_vars_)[out_var.scope_idx()].end() && - !out_var_iter->second.empty(), - "Cannot find variable %s", out_name); + PADDLE_ENFORCE_EQ( + (out_var_iter != (*all_vars_)[out_var.scope_idx()].end() && + !out_var_iter->second.empty()), + true, platform::errors::NotFound("Cannot find variable %s.", out_name)); if (out_var_iter->second[0] != &out_var) { return false; @@ -282,7 +295,11 @@ bool MemoryReusePass::IsVarPairReusable( const details::VarHandle &in_var, const details::VarHandle &out_var) const { auto *op = dynamic_cast(out_var.GeneratedOp()); - PADDLE_ENFORCE_NOT_NULL(op); + PADDLE_ENFORCE_NOT_NULL( + op, + platform::errors::InvalidArgument( + "Var(%s) have no GeneratedOp, or it's op is not ComputationOpHandle.", + out_var.Name())); const auto in_name = in_var.Name(); if (in_name == out_var.Name()) { @@ -308,8 +325,10 @@ bool MemoryReusePass::IsVarPairReusable( void MemoryReusePass::AddReuseVar(details::ComputationOpHandle *op, details::VarHandle *in_var, details::VarHandle *out_var) const { - PADDLE_ENFORCE((*var_infos_)[op->GetScopeIdx()].count(in_var->Name()) > 0, - "%s does not in mem-opt var infos", in_var->Name()); + PADDLE_ENFORCE_GT( + (*var_infos_)[op->GetScopeIdx()].count(in_var->Name()), 0, + platform::errors::NotFound("Var(%s) does not in mem opt var infos.", + in_var->Name())); if (ops_.count(op) == 0) { InsertShareTensorBufferOpHandleToGraph(op); @@ -349,7 +368,10 @@ void MemoryReusePass::UpdateLastLiveOpOfVar(details::ComputationOpHandle *op, if (out_var_op_iter == (*last_live_ops_of_vars_)[scope_idx].end()) { last_live_op_of_in_var = op; } else { - PADDLE_ENFORCE(!out_var_op_iter->second.ops().empty()); + PADDLE_ENFORCE_EQ( + out_var_op_iter->second.ops().empty(), false, + platform::errors::InvalidArgument( + "Var(%s)'s last live op should not empty.", out_var->Name())); last_live_op_of_in_var = *(out_var_op_iter->second.ops().begin()); } @@ -359,8 +381,9 @@ void MemoryReusePass::UpdateLastLiveOpOfVar(details::ComputationOpHandle *op, last_live_ops_of_in_var->insert(last_live_op_of_in_var); auto in_var_info_iter = (*var_infos_)[scope_idx].find(in_var->Name()); - PADDLE_ENFORCE(in_var_info_iter != (*var_infos_)[scope_idx].end(), - "Cannot find variable %s", in_var->Name()); + PADDLE_ENFORCE_NE( + in_var_info_iter, (*var_infos_)[scope_idx].end(), + platform::errors::NotFound("Cannot find variable %s.", in_var->Name())); in_var_info_iter->second->SetRefCnt(1); } diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/op_graph_view.cc b/paddle/fluid/framework/ir/memory_optimize_pass/op_graph_view.cc index d2cc89a2b4..11c2508afb 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/op_graph_view.cc +++ b/paddle/fluid/framework/ir/memory_optimize_pass/op_graph_view.cc @@ -39,7 +39,7 @@ void OpGraphView::Build(const std::vector &ops) { } PADDLE_ENFORCE( preceding_ops_.size() == ops.size() && pending_ops_.size() == ops.size(), - "There are duplicate ops in graph."); + platform::errors::InvalidArgument("There are duplicate ops in graph.")); } std::unordered_set OpGraphView::AllOps() const { @@ -56,8 +56,10 @@ bool OpGraphView::HasOp(details::OpHandleBase *op) const { } void OpGraphView::EnforceHasOp(details::OpHandleBase *op) const { - PADDLE_ENFORCE(HasOp(op), "Cannot find op %s in OpGraphView", - op == nullptr ? "nullptr" : op->DebugString()); + PADDLE_ENFORCE_EQ(HasOp(op), true, + platform::errors::NotFound( + "Cannot find op %s in OpGraphView.", + op == nullptr ? "nullptr" : op->DebugString())); } const std::unordered_set &OpGraphView::PendingOps( diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/op_graph_view.h b/paddle/fluid/framework/ir/memory_optimize_pass/op_graph_view.h index 86b25c1395..5fb2caedba 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/op_graph_view.h +++ b/paddle/fluid/framework/ir/memory_optimize_pass/op_graph_view.h @@ -127,9 +127,13 @@ void OpGraphView::BreadthFirstVisit(Callback &&callback) const { } } - PADDLE_ENFORCE_EQ(num_calls, op_num, "There are unvisited ops"); - PADDLE_ENFORCE_EQ(visited_ops.size(), op_num, "There are unvisited ops"); - PADDLE_ENFORCE(op_deps.empty(), "There are unvisited ops"); + PADDLE_ENFORCE_EQ(num_calls, op_num, platform::errors::InvalidArgument( + "There are unvisited ops.")); + PADDLE_ENFORCE_EQ( + visited_ops.size(), op_num, + platform::errors::InvalidArgument("There are unvisited ops.")); + PADDLE_ENFORCE_EQ(op_deps.empty(), true, platform::errors::InvalidArgument( + "There are unvisited ops.")); } } // namespace ir diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/reference_count_pass.cc b/paddle/fluid/framework/ir/memory_optimize_pass/reference_count_pass.cc index 4584b3d4e0..88d1b2aa00 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/reference_count_pass.cc +++ b/paddle/fluid/framework/ir/memory_optimize_pass/reference_count_pass.cc @@ -77,11 +77,15 @@ class ShrinkDepsOpFunctor { const std::vector &ops) const { std::unordered_map op_to_idx; for (size_t i = 0; i < ops.size(); ++i) { - PADDLE_ENFORCE(graph_.HasOp(ops[i]), "Op does not exist in graph"); + PADDLE_ENFORCE_EQ( + graph_.HasOp(ops[i]), true, + platform::errors::InvalidArgument("Op does not exist in graph.")); op_to_idx[ops[i]] = i; } - PADDLE_ENFORCE(op_to_idx.size() == ops.size(), "Duplicate ops"); + PADDLE_ENFORCE_EQ( + op_to_idx.size(), ops.size(), + platform::errors::InvalidArgument("Graph may have duplicate ops.")); std::vector> ret(ops.size()); for (auto &e : ret) { @@ -247,9 +251,9 @@ ExtractComputationOpFromLastLivedVar(details::VarHandle *var, size_t scope_idx, return {}; } - PADDLE_ENFORCE_EQ( - computation_ops.empty(), false, - platform::errors::InvalidArgument("Computation ops should not be empty")); + PADDLE_ENFORCE_EQ(computation_ops.empty(), false, + platform::errors::InvalidArgument( + "Computation ops should not be empty.")); // stage four. Try to shrink computation op if they depend on each other. // Get the smallest set of the most ops. @@ -263,8 +267,9 @@ void ReferenceCountPass::ApplyImpl(ir::Graph *graph) const { Get>(kLastLiveOpsOfVars); PADDLE_ENFORCE(last_live_ops_of_vars.empty() && var_infos.empty(), - "Last Live Ops and Reference Counts of vars should be " - "initialized at here."); + platform::errors::InvalidArgument( + "Last live ops and reference counts of vars should be " + "initialized at here.")); const auto &vars = graph->Get(details::kGraphVars); @@ -304,11 +309,15 @@ void ReferenceCountPass::ApplyImpl(ir::Graph *graph) const { auto &var_name = name_var_pair.first; auto &var_handles = name_var_pair.second; - PADDLE_ENFORCE_EQ(var_desc->Name(), var_name); - PADDLE_ENFORCE_EQ( - var_handles.empty(), false, - platform::errors::InvalidArgument("Variable %s not found", var_name)); + var_desc->Name(), var_name, + platform::errors::InvalidArgument( + "A Var, it's VarName(%s) and DescName(%s) not same.", var_name, + var_desc->Name())); + + PADDLE_ENFORCE_EQ(var_handles.empty(), false, + platform::errors::InvalidArgument( + "Variable %s not found.", var_name)); auto last_ver_var = var_handles.back(); if (last_ver_var->Node()->IsCtrlVar()) { @@ -327,12 +336,13 @@ void ReferenceCountPass::ApplyImpl(ir::Graph *graph) const { continue; } + PADDLE_ENFORCE_EQ(status, LastLiveOpSearchStatus::kSuccess, + platform::errors::InvalidArgument( + "Status(%d) must be success.", status)); PADDLE_ENFORCE_EQ( - status, LastLiveOpSearchStatus::kSuccess, - platform::errors::InvalidArgument("status must be success")); - PADDLE_ENFORCE_EQ(result.empty(), false, - platform::errors::NotFound( - "Last living ops of %s cannot be empty", var_name)); + result.empty(), false, + platform::errors::NotFound("Last living ops of %s cannot be empty.", + var_name)); std::string last_live_ops_log_str; for (auto &each_ret : result) { diff --git a/paddle/fluid/framework/ir/multi_devices_graph_pass/all_reduce_deps_pass.cc b/paddle/fluid/framework/ir/multi_devices_graph_pass/all_reduce_deps_pass.cc index 8923dfc323..6d5e4ac27b 100644 --- a/paddle/fluid/framework/ir/multi_devices_graph_pass/all_reduce_deps_pass.cc +++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/all_reduce_deps_pass.cc @@ -45,7 +45,9 @@ class AllReduceDepsPass : public ir::Pass { for (size_t i = 0; i < all_reduce_op_handles.size(); ++i) { auto op_handle = dynamic_cast(all_reduce_op_handles[i]); - PADDLE_ENFORCE(op_handle, "op_handle must be NCCLOpHandleBase"); + PADDLE_ENFORCE_NOT_NULL(op_handle, + platform::errors::InvalidArgument( + "Op handle must be NCCLOpHandleBase.")); op_handle->SetRunEnv(i, use_hierarchical_allreduce); } #endif @@ -95,7 +97,9 @@ class AllReduceDepsPass : public ir::Pass { } } - PADDLE_ENFORCE_NE(next_ready_ops.size(), 0, "There maybe have a cycle."); + PADDLE_ENFORCE_NE( + next_ready_ops.size(), 0, + platform::errors::InvalidArgument("There may be a cycle.")); ready_ops.clear(); std::swap(ready_ops, next_ready_ops); GetSortedAllReduceOps(ready_ops, &all_reduce_op_handles); @@ -122,18 +126,25 @@ class AllReduceDepsPass : public ir::Pass { // NOTE(zcd): For distributed training, it is important to keep the order of // allReduce on each node consistent. Otherwise, hang may occur. // Sort the current_all_reduce_op_handles according to the name of input. - sort(current_all_reduce_op_handles.begin(), - current_all_reduce_op_handles.end(), - [](const details::OpHandleBase* left, - const details::OpHandleBase* right) -> bool { - auto left_in_vars = - details::DynamicCast(left->Inputs()); - auto right_in_vars = - details::DynamicCast(right->Inputs()); - PADDLE_ENFORCE_GT(left_in_vars.size(), 0); - PADDLE_ENFORCE_GT(right_in_vars.size(), 0); - return left_in_vars[0]->Name() > right_in_vars[0]->Name(); - }); + sort( + current_all_reduce_op_handles.begin(), + current_all_reduce_op_handles.end(), + [](const details::OpHandleBase* left, + const details::OpHandleBase* right) -> bool { + auto left_in_vars = + details::DynamicCast(left->Inputs()); + auto right_in_vars = + details::DynamicCast(right->Inputs()); + PADDLE_ENFORCE_GT(left_in_vars.size(), 0, + platform::errors::InvalidArgument( + "OpHandle(%s) inputs size must greater than 0.", + left->Name())); + PADDLE_ENFORCE_GT(right_in_vars.size(), 0, + platform::errors::InvalidArgument( + "OpHandle(%s) inputs size must greater than 0.", + right->Name())); + return left_in_vars[0]->Name() > right_in_vars[0]->Name(); + }); all_reduce_op_handles->insert(all_reduce_op_handles->end(), current_all_reduce_op_handles.begin(), @@ -170,7 +181,10 @@ class AllReduceDepsPass : public ir::Pass { break; } } - PADDLE_ENFORCE(find_valid_input, "Doesn't find valid input."); + PADDLE_ENFORCE_EQ( + find_valid_input, true, + platform::errors::NotFound( + "In OpHandle(%s) Doesn't find valid input.", op->Name())); } VLOG(10) << out2.str(); if (grads_of_stale_program != all_reduce_op_handles.size()) { diff --git a/paddle/fluid/framework/ir/multi_devices_graph_pass/backward_optimizer_op_deps_pass.cc b/paddle/fluid/framework/ir/multi_devices_graph_pass/backward_optimizer_op_deps_pass.cc index 782c51a032..2aae14fa33 100644 --- a/paddle/fluid/framework/ir/multi_devices_graph_pass/backward_optimizer_op_deps_pass.cc +++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/backward_optimizer_op_deps_pass.cc @@ -179,9 +179,10 @@ class BackWardOpDepsPass : public ir::Pass { // Currently, we assume that once gradient is generated, it can be // broadcast, and each gradient is only broadcast once. auto backward_vars = details::GetOpRoleVarsOrEmpty(op_desc); - PADDLE_ENFORCE_EQ(node->IsWrappedBy(), true, - platform::errors::InvalidArgument( - "Node must be wrapped by OpHandleBase")); + PADDLE_ENFORCE_EQ( + node->IsWrappedBy(), true, + platform::errors::InvalidArgument( + "Node(%s) must be wrapped by OpHandleBase.", node->Name())); backward_op_handles->emplace_back(&node->Wrapper()); diff --git a/paddle/fluid/framework/ir/multi_devices_graph_pass/fuse_all_reduce_op_pass.cc b/paddle/fluid/framework/ir/multi_devices_graph_pass/fuse_all_reduce_op_pass.cc index 86fbbaf772..81c98ecf0c 100644 --- a/paddle/fluid/framework/ir/multi_devices_graph_pass/fuse_all_reduce_op_pass.cc +++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/fuse_all_reduce_op_pass.cc @@ -64,9 +64,10 @@ class FuseAllReduceOpPass : public ir::Pass { PADDLE_ENFORCE_EQ( all_reduce_ops.size(), grads.size(), platform::errors::Unimplemented( - "The number of all_reduce OpHandle is not equal to the " - "number of grads. Maybe some gradients are sparse type, " - "it is not supported currently.")); + "The number of all_reduce OpHandle(%d) is not equal to the " + "number of grads(%d). Maybe some gradients are sparse type, " + "it is not supported currently.", + all_reduce_ops.size(), grads.size())); auto &group_params_grads = graph->Get( details::kGroupParamsAndDenseGrads); @@ -79,7 +80,10 @@ class FuseAllReduceOpPass : public ir::Pass { for (auto &group_p_g : group_params_grads) { size_t group_size = group_p_g.size(); - PADDLE_ENFORCE_GT(group_size, static_cast(0)); + PADDLE_ENFORCE_GT( + group_size, static_cast(0), + platform::errors::InvalidArgument( + "Parameter and Parameter@grad in one group, must not be empty.")); std::vector group_all_reduce_ops; group_all_reduce_ops.reserve(group_size); for (auto &p_g : group_p_g) { @@ -103,26 +107,40 @@ class FuseAllReduceOpPass : public ir::Pass { all_reduce_ops.reserve(grads.size()); for (auto &node : result.Nodes()) { if (node->IsOp()) { - PADDLE_ENFORCE(node->IsWrappedBy()); + PADDLE_ENFORCE_EQ( + node->IsWrappedBy(), true, + platform::errors::InvalidArgument( + "Op Node(%s) should Wrapped by OpHandleBase.", node->Name())); auto *all_reduce_op_handle = dynamic_cast( &node->Wrapper()); if (all_reduce_op_handle) { #if defined(PADDLE_WITH_DGC) PADDLE_ENFORCE_NE( all_reduce_op_handle->Name(), "sparse_all_reduce", - "DGC doesn't support fuse for now, if you want to use DGC " - "you need set strategy.fuse_all_reduce_ops = False."); + platform::errors::InvalidArgument( + "DGC doesn't support fuse for now, if you want to use DGC " + "you need set strategy.fuse_all_reduce_ops = False.")); #endif auto inputs = details::DynamicCast( all_reduce_op_handle->Inputs()); - PADDLE_ENFORCE_EQ(inputs.size(), num_place); + PADDLE_ENFORCE_EQ(inputs.size(), num_place, + platform::errors::InvalidArgument( + "The input size(%d) of all reduce op must " + "equal to place cnt(%d)!", + inputs.size(), num_place)); // The inputs' name should be the same. auto &grad_name = inputs[0]->name(); for (size_t i = 1; i < inputs.size(); ++i) { - PADDLE_ENFORCE_EQ(inputs[i]->name(), grad_name, - "The input name should be the same."); + PADDLE_ENFORCE_EQ( + inputs[i]->name(), grad_name, + platform::errors::InvalidArgument( + "The input name should be the same.diff name: %s %s.", + inputs[i]->name(), grad_name)); } - PADDLE_ENFORCE_NE(grads.count(grad_name), static_cast(0)); + PADDLE_ENFORCE_NE( + grads.count(grad_name), static_cast(0), + platform::errors::InvalidArgument( + "Parameter@grad(%s) must in grad set.", grad_name)); all_reduce_ops.emplace(grad_name, node); } } diff --git a/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_check_pass.cc b/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_check_pass.cc index 8cc33a6ceb..73f8cd67ee 100644 --- a/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_check_pass.cc +++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_check_pass.cc @@ -24,7 +24,10 @@ namespace ir { class SSAGraghBuilderWithChecker : public ir::Pass { protected: void ApplyImpl(ir::Graph *graph) const override { - PADDLE_ENFORCE(IsValidGraph(graph)); + PADDLE_ENFORCE_EQ( + IsValidGraph(graph), true, + platform::errors::InvalidArgument( + "In SSAGraghBuilderWithChecker, invalid Graph input.")); } bool IsValidGraph(const ir::Graph *graph) const { diff --git a/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc b/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc index 4fbd8a878a..fd82d6b10e 100644 --- a/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc +++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc @@ -163,7 +163,13 @@ void MultiDevSSAGraphBuilderBase::Init() const { nccl_ctxs_ = multi_nccl_ctxs_->DefaultFlatCtx(); } #endif - PADDLE_ENFORCE_EQ(places_.size(), local_scopes_.size()); + PADDLE_ENFORCE_EQ( + places_.size(), local_scopes_.size(), + platform::errors::InvalidArgument( + "Places size and LocalScopes not equal " + "Places size(%d), LocalScopes size(%d) " + "If use multi devices, Places size must equas to LocalScopes size.", + places_.size(), local_scopes_.size())); } void MultiDevSSAGraphBuilderBase::ApplyImpl(ir::Graph *graph) const { @@ -500,7 +506,11 @@ void MultiDevSSAGraphBuilderBase::CreateAllReduceOp(ir::Graph *result, SetCommunicationContext(op_handle, places_[i]); auto &vars = result->Get(details::kGraphVars)[i][og]; - PADDLE_ENFORCE(!vars.empty()); + PADDLE_ENFORCE_EQ(vars.empty(), false, + platform::errors::InvalidArgument( + "Can not find Var(%s) in Place[%d] " + "Paddle Can not add AllReduce OP for Var(%s).", + og, i, og)); auto &prev_grad = vars.back(); op_handle->AddInput(prev_grad); VLOG(10) << "all_reduce_op_handle add input " << prev_grad->DebugString(); @@ -566,7 +576,11 @@ details::VarHandle *MultiDevSSAGraphBuilderBase::CreateReduceOp( auto &p = places_[i]; SetCommunicationContext(op_handle, p); auto &vars = result->Get(details::kGraphVars)[i][og]; - PADDLE_ENFORCE(!vars.empty()); + PADDLE_ENFORCE_EQ(vars.empty(), false, + platform::errors::InvalidArgument( + "Can not find Var(%s) in Place[%d] " + "Paddle Can not add Reduce OP for Var(%s).", + og, i, og)); auto &prev_grad = vars.back(); op_handle->AddInput(prev_grad); } @@ -590,7 +604,11 @@ bool MultiDevSSAGraphBuilderBase::IsScaleLossOp(ir::Node *node) const { bool MultiDevSSAGraphBuilderBase::IsSparseGradient( const std::string &og) const { - PADDLE_ENFORCE(all_vars_.count(og) != 0); + PADDLE_ENFORCE_NE(all_vars_.count(og), 0, + platform::errors::InvalidArgument( + "Can not find Var(%s) in VarDescs " + "Paddle Can not add Collective OP for Var(%s).", + og, og)); return all_vars_.at(og)->GetType() == proto::VarType::SELECTED_ROWS; } @@ -641,10 +659,20 @@ int BalanceVarSSAGraphBuilder::GetOpDeviceID(ir::Node *node) const { std::vector, node->Op()->GetAttr(OpProtoAndCheckerMaker::OpRoleVarAttrName())); - PADDLE_ENFORCE_EQ(param_grad.size(), 2U); + PADDLE_ENFORCE_EQ( + param_grad.size(), 2U, + platform::errors::InvalidArgument( + "In Node %s, the size of attribute %s must be 2, include Parameter " + "and Parameter@Grad.", + node->Name(), OpProtoAndCheckerMaker::OpRoleVarAttrName())); int dev_id = GetVarDeviceID(param_grad[1]); - PADDLE_ENFORCE_NE(dev_id, -1, "dev_id should not be -1.[%s, %s, %s]", - node->Op()->Type(), param_grad[0], param_grad[1]); + PADDLE_ENFORCE_NE(dev_id, -1, platform::errors::NotFound( + "Can not find Device ID, for NodeName:%s, " + "NodeType:%s, Param:%s, Param@Grad:%s" + "For this fault, you can consult the " + "Paddle technical personnel for answer ", + node->Name(), node->Op()->Type(), + param_grad[0], param_grad[1])); return dev_id; } @@ -654,10 +682,16 @@ size_t BalanceVarSSAGraphBuilder::GetAppropriateDeviceID( for (auto var_name : var_names) { if (all_vars_.find(var_name) == all_vars_.end()) continue; auto var_desc = all_vars_.at(var_name); - PADDLE_ENFORCE_NOT_NULL(var_desc); + PADDLE_ENFORCE_NOT_NULL(var_desc, + platform::errors::NotFound( + "Can not find Var(%s) in Var Desc.", var_name)); auto dim = framework::make_ddim(var_desc->GetShape()); int64_t numel = framework::product(dim); - PADDLE_ENFORCE_GT(numel, 0); + PADDLE_ENFORCE_GT(numel, 0, + platform::errors::InvalidArgument( + "The numel of Var(%s) must greater than 0" + "Please check your code,about Var(%s) Shape.", + var_name, var_name)); numel_sum += numel; } @@ -736,7 +770,12 @@ int ReduceSSAGraphBuilder::GetOpDeviceID( std::vector, node->Op()->GetAttr(OpProtoAndCheckerMaker::OpRoleVarAttrName())); - PADDLE_ENFORCE_EQ(param_grad.size(), 2U); + PADDLE_ENFORCE_EQ( + param_grad.size(), 2U, + platform::errors::InvalidArgument( + "In Node %s, The size of attribute %s must be 2, include Parameter " + "and Parameter@Grad.", + node->Name(), OpProtoAndCheckerMaker::OpRoleVarAttrName())); int dev_id = GetVarDeviceID(param_grad[1]); if (dev_id == -1) { @@ -798,7 +837,12 @@ std::vector ReduceSSAGraphBuilder::SortForReduceMode( } } - PADDLE_ENFORCE_EQ(sorted_ops.size(), topo_ops.size()); + PADDLE_ENFORCE_EQ(sorted_ops.size(), topo_ops.size(), + platform::errors::InvalidArgument( + "Sorted ops calc error!" + "The result for sorted ops size(%d) must be " + "equal to topo ops size(%d).", + sorted_ops.size(), topo_ops.size())); ResetState(); return sorted_ops; @@ -820,14 +864,23 @@ bool DistSSAGraphBuilder::DealWithSpecialOp(ir::Graph *result, bool insert_op = false; if (OpHaveRole(*node, OpRole::kRPC)) { int op_dev_id = CreateRPCOp(result, node); - PADDLE_ENFORCE(op_dev_id != -1, - "Can not schedule the RPC operator to the right place."); + PADDLE_ENFORCE_NE(op_dev_id, -1, platform::errors::InvalidArgument( + "Can not schedule the RPC operator to " + "the right place. NodeName:%s.", + node->Name())); if (node->Op()->Type() == "recv") { auto recv_vars_attr = BOOST_GET_CONST(std::vector, node->Op()->GetNullableAttr( OpProtoAndCheckerMaker::OpRoleVarAttrName())); - PADDLE_ENFORCE(recv_vars_attr.size() == 2UL); // [parameter, gradient] + PADDLE_ENFORCE_EQ( + recv_vars_attr.size(), 2UL, + platform::errors::InvalidArgument( + "In Node %s, the size of attribute %s must be 2, include " + "Parameter and Parameter@Grad.", + node->Name(), + OpProtoAndCheckerMaker::OpRoleVarAttrName())); // [parameter, + // gradient] if (recv_vars_attr[0].find(".block") == std::string::npos) { bcast_var_name_set_[op_dev_id].emplace(recv_vars_attr[0]); } @@ -879,8 +932,9 @@ int DistSSAGraphBuilder::CreateRPCOp(ir::Graph *result, ir::Node *node) const { if (node->Op()->Type() == "send") { // TODO(paddle-dev): getting the first var is not safe. op_dev_id = GetVarDeviceID(node->inputs[0]->Name()); - PADDLE_ENFORCE(!ir::IsControlDepVar(*node->inputs[0]), - "This hack no longer holds, please fix."); + PADDLE_ENFORCE_EQ(ir::IsControlDepVar(*node->inputs[0]), false, + platform::errors::InvalidArgument( + "This hack no longer holds, please fix.")); // the variable name which contains .block means it was split by // split_byref op if (strategy_.reduce_ == @@ -893,7 +947,12 @@ int DistSSAGraphBuilder::CreateRPCOp(ir::Graph *result, ir::Node *node) const { auto send_param_grad = BOOST_GET_CONST( std::vector, node->Op()->GetAttr(OpProtoAndCheckerMaker::OpRoleVarAttrName())); - PADDLE_ENFORCE_EQ(send_param_grad.size(), 2U); + PADDLE_ENFORCE_EQ( + send_param_grad.size(), 2U, + platform::errors::InvalidArgument( + "In Node %s, the size of attribute %s must be 2, include " + "Parameter and Parameter@Grad.", + node->Name(), OpProtoAndCheckerMaker::OpRoleVarAttrName())); op_dev_id = GetAppropriateDeviceID({send_param_grad[1]}); VLOG(10) << "send grad " << input_var_names[0] << " origin " << send_param_grad[1] << " place: " << op_dev_id; @@ -926,9 +985,10 @@ int DistSSAGraphBuilder::CreateRPCOp(ir::Graph *result, ir::Node *node) const { op_dev_id = 0; } - PADDLE_ENFORCE(op_dev_id != -1, "can not find the right place for rpc op: %s", - node->Op()->Type()); - + PADDLE_ENFORCE_NE( + op_dev_id, -1, + platform::errors::NotFound("Can not find the right place for rpc op: %s.", + node->Op()->Type())); // Create fetch_barrier op handle to enable output on all devices. // **NOTE** fetch_barrier should output variables list same as recv op does. if (node->Op()->Type() == "fetch_barrier") { @@ -956,7 +1016,10 @@ int DistSSAGraphBuilder::CreateRPCOp(ir::Graph *result, ir::Node *node) const { int outvar_dev_id = op_dev_id; if (node->Op()->Type() == "fetch_barrier") { outvar_dev_id = GetVarDeviceID(output->Name()); - PADDLE_ENFORCE_NE(outvar_dev_id, -1, "output name %s", output->Name()); + PADDLE_ENFORCE_NE(outvar_dev_id, -1, + platform::errors::NotFound( + "Can not find the right place for the var: %s.", + output->Name())); } p = places_[outvar_dev_id]; ir::Node *new_node = nullptr; @@ -1007,13 +1070,14 @@ int DistSSAGraphBuilder::CreateDistTrainOp(ir::Graph *result, } else { LOG(ERROR) << "got unexpected dist op: " << node->Op()->Type(); PADDLE_THROW( - "the distribute training related op should be in [split_byref, " - "concat]."); + platform::errors::Unimplemented("The distribute training related op " + "should be in [split_byref, concat].")); } - PADDLE_ENFORCE(op_dev_id != -1, - "can not find right place for distributed op: %s", - node->Op()->Type()); + PADDLE_ENFORCE_NE(op_dev_id, -1, + platform::errors::NotFound( + "Can not find right place for distributed op: %s.", + node->Op()->Type())); CreateComputationalOp(result, node, op_dev_id); return op_dev_id; diff --git a/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_print_pass.cc b/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_print_pass.cc index efd549e79d..a080b4bc33 100644 --- a/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_print_pass.cc +++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_print_pass.cc @@ -28,7 +28,10 @@ class SSAGraghBuilderWithPrinterPass : public ir::Pass { void ApplyImpl(ir::Graph *graph) const override { std::unique_ptr fout( new std::ofstream(Get(kGraphvizPath))); - PADDLE_ENFORCE(fout->good()); + PADDLE_ENFORCE_EQ( + fout->good(), true, + platform::errors::Unavailable("Open file fail! kGraphvizPath = %s.", + Get(kGraphvizPath))); if (Has("graph_printer")) { Get("graph_printer").Print(*graph, *fout); } else { diff --git a/paddle/fluid/framework/ir/multi_devices_graph_pass/sequential_execution_pass.cc b/paddle/fluid/framework/ir/multi_devices_graph_pass/sequential_execution_pass.cc index 7de3b7c605..bcbd1e066c 100644 --- a/paddle/fluid/framework/ir/multi_devices_graph_pass/sequential_execution_pass.cc +++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/sequential_execution_pass.cc @@ -54,11 +54,16 @@ class SequentialExecutionPass : public ir::Pass { if (!node->IsOp()) continue; std::unordered_set preceding_ops; for (auto *in : node->inputs) { - PADDLE_ENFORCE(in->IsVar(), - "Preceding Node of Op Nodes must be Var Node"); + PADDLE_ENFORCE_EQ( + in->IsVar(), true, + platform::errors::InvalidArgument( + "Preceding Node(%s) of Op Nodes must be Var Node.", + in->Name())); if (in->inputs.empty()) continue; - PADDLE_ENFORCE(in->inputs.size() == 1 && in->inputs[0]->IsOp(), - "Preceding Op Node of Var Node must be unique"); + PADDLE_ENFORCE_EQ((in->inputs.size() == 1 && in->inputs[0]->IsOp()), + true, + platform::errors::InvalidArgument( + "Preceding Op Node of Var Node must be unique.")); preceding_ops.insert(in->inputs[0]); pending_ops[in->inputs[0]].insert(node); } @@ -72,15 +77,18 @@ class SequentialExecutionPass : public ir::Pass { ir::Node *found_node = nullptr; for (auto *node : ready_ops) { if (IsSameOpDesc(op_desc, node->Op())) { - PADDLE_ENFORCE(found_node == nullptr, - "Found multiple op_desc in graph: %s", - op_desc->Type()); + PADDLE_ENFORCE_EQ( + found_node, nullptr, + platform::errors::InvalidArgument( + "Found multiple op_desc in graph: %s.", op_desc->Type())); found_node = node; } } - PADDLE_ENFORCE_NOT_NULL(found_node, "Cannot find op_desc in graph: %s", - op_desc->Type()); + PADDLE_ENFORCE_NOT_NULL( + found_node, + platform::errors::NotFound("Cannot find op_desc in graph: %s.", + op_desc->Type())); for (auto *pending_op : pending_ops[found_node]) { if (--op_deps.at(pending_op) == 0) { ready_ops.insert(pending_op); -- GitLab