未验证 提交 fce64662 编写于 作者: W wanghuancoder 提交者: GitHub

fix some errmsg report, in framework/ir/ subdir(memory,optimizer,multi_device) (#25460)

* fix paddle/fluid/framework/ir/multi_devices_graph_pass/ error msg reoprt, test=develop

* fix paddle/fluid/framework/ir/memory_optimize_pass/ error msg reoprt, test=develop

* fix paddle/fluid/framework/ir/fuse_optimizer_ops_pass/ error msg reoprt, test=develop

* fix paddle/fluid/framework/ir/memory_optimize_pass/ error msg reoprt about PADDLE_ENFORCE, test=develop

* modify error msg reoprt,about errortype,grammar. test=develop

* modify error msg reoprt,about PADDLE_ENFORCE to PADDLE_ENFORCE_XXX, test=develop

* modify error msg reoprt,about PADDLE_ENFORCE to PADDLE_ENFORCE_XXX, and %s to %d, test=develop

* modified some error descriptions, test=develop
上级 22501202
......@@ -50,18 +50,25 @@ class FuseAdamOpPass : public FuseOptimizerOpPass {
fused_scale2->inputs.end());
for (auto &out_node : fused_scale1->outputs) {
if (fused_scale2_in_nodes.count(out_node)) {
PADDLE_ENFORCE(out_node->IsCtrlVar(),
"The dependency var only should be ctrl var.");
PADDLE_ENFORCE_EQ(out_node->IsCtrlVar(), true,
platform::errors::PreconditionNotMet(
"In adam op pass, the dependency var(%s) only "
"should be ctrl var.",
out_node->Name()));
not_need_ctrl_var_nodes.insert(out_node);
}
}
for (auto &node : not_need_ctrl_var_nodes) {
// remove this node from the input op node.
PADDLE_ENFORCE(!node->inputs.empty(),
"The input should not be empty here.");
PADDLE_ENFORCE_EQ(
node->inputs.empty(), false,
platform::errors::PreconditionNotMet(
"Node(%s)'s input should not be empty here.", node->Name()));
auto op_node = node->inputs.front();
PADDLE_ENFORCE(op_node->IsOp());
PADDLE_ENFORCE_EQ(op_node->IsOp(), true,
platform::errors::PreconditionNotMet(
"Node(%s) should be an OP node.", op_node->Name()));
op_node->outputs.erase(
remove_if(
op_node->outputs.begin(), op_node->outputs.end(),
......@@ -85,7 +92,9 @@ class FuseAdamOpPass : public FuseOptimizerOpPass {
const std::unordered_map<std::string, std::vector<std::string>> &vars_set,
const std::unordered_map<std::string, std::string> &fused_vars_name,
const std::vector<ir::Node *> &adam_ops, ir::Graph *graph) const {
PADDLE_ENFORCE_GT(adam_ops.size(), static_cast<size_t>(0));
PADDLE_ENFORCE_GT(
adam_ops.size(), static_cast<size_t>(0),
platform::errors::InvalidArgument("No adam op in the graph."));
// Check attributions
// NOTE: If new attribution is added, the following code maybe need change.
......@@ -102,22 +111,58 @@ class FuseAdamOpPass : public FuseOptimizerOpPass {
int64_t, adam_ops[0]->Op()->GetAttr("min_row_size_to_use_multithread"));
for (auto &adam_op : adam_ops) {
PADDLE_ENFORCE_EQ(
beta1, BOOST_GET_CONST(float, adam_op->Op()->GetAttr("beta1")));
beta1, BOOST_GET_CONST(float, adam_op->Op()->GetAttr("beta1")),
platform::errors::PreconditionNotMet(
"All adam Op's attr(beta1) must be same, but there are two "
"different "
"value: %f, %f.",
beta1, BOOST_GET_CONST(float, adam_op->Op()->GetAttr("beta1"))));
PADDLE_ENFORCE_EQ(
beta2, BOOST_GET_CONST(float, adam_op->Op()->GetAttr("beta2")));
beta2, BOOST_GET_CONST(float, adam_op->Op()->GetAttr("beta2")),
platform::errors::PreconditionNotMet(
"All adam Op's attr(beta2) must be same, but there are two "
"different "
"value: %f, %f.",
beta2, BOOST_GET_CONST(float, adam_op->Op()->GetAttr("beta2"))));
PADDLE_ENFORCE_EQ(
epsilon, BOOST_GET_CONST(float, adam_op->Op()->GetAttr("epsilon")));
epsilon, BOOST_GET_CONST(float, adam_op->Op()->GetAttr("epsilon")),
platform::errors::PreconditionNotMet(
"All adam Op's attr(epsilon) must be same, but there are two "
"different "
"value: %f, %f.",
epsilon,
BOOST_GET_CONST(float, adam_op->Op()->GetAttr("epsilon"))));
PADDLE_ENFORCE_EQ(
lazy_mode,
BOOST_GET_CONST(bool, adam_op->Op()->GetAttr("lazy_mode")));
lazy_mode, BOOST_GET_CONST(bool, adam_op->Op()->GetAttr("lazy_mode")),
platform::errors::PreconditionNotMet(
"All adam Op's attr(lazy_mode) must be same, but there are two "
"different "
"value: %d, %d.",
lazy_mode,
BOOST_GET_CONST(bool, adam_op->Op()->GetAttr("lazy_mode"))));
PADDLE_ENFORCE_EQ(
min_row_size_to_use_multithread,
BOOST_GET_CONST(int64_t, adam_op->Op()->GetAttr(
"min_row_size_to_use_multithread")));
"min_row_size_to_use_multithread")),
platform::errors::PreconditionNotMet(
"All adam Op's attr(min_row_size_to_use_multithread) must be "
"same, but there are two different value: %I64, %I64.",
min_row_size_to_use_multithread,
BOOST_GET_CONST(
int64_t,
adam_op->Op()->GetAttr("min_row_size_to_use_multithread"))));
PADDLE_ENFORCE_EQ(
op_role,
BOOST_GET_CONST(int, adam_op->Op()->GetAttr(
OpProtoAndCheckerMaker::OpRoleAttrName())));
OpProtoAndCheckerMaker::OpRoleAttrName())),
platform::errors::PreconditionNotMet(
"All adam Op's attr(op_role) must be same, but there are two "
"different "
"value: %d, %d.",
op_role,
BOOST_GET_CONST(int,
adam_op->Op()->GetAttr(
OpProtoAndCheckerMaker::OpRoleAttrName()))));
}
// NOTE: fused_var is only exist in scope, so the graph doesn't have
......@@ -154,7 +199,10 @@ class FuseAdamOpPass : public FuseOptimizerOpPass {
const std::string &fused_var_name,
const std::vector<ir::Node *> &adam_ops,
ir::Graph *graph) const {
PADDLE_ENFORCE_EQ(beta_name.size(), adam_ops.size());
PADDLE_ENFORCE_EQ(beta_name.size(), adam_ops.size(),
platform::errors::InvalidArgument(
"Beta name size(%d) must equal to adam op size(%d).",
beta_name.size(), adam_ops.size()));
const std::string scale_op_name = "scale";
// Get the scale_ops of dealing the adam's beta var.
......@@ -168,7 +216,9 @@ class FuseAdamOpPass : public FuseOptimizerOpPass {
return var_node->Var() &&
var_node->Var()->Name() == beta_1_pow_name;
});
PADDLE_ENFORCE(beta_pow_iter != adam_ops[i]->inputs.end());
PADDLE_ENFORCE_NE(beta_pow_iter, adam_ops[i]->inputs.end(),
platform::errors::NotFound(
"Can not find %s in adam ops.", beta_1_pow_name));
auto beta_pow_node = *beta_pow_iter;
auto scale_op_iter = std::find_if(
......@@ -176,11 +226,18 @@ class FuseAdamOpPass : public FuseOptimizerOpPass {
[&scale_op_name](ir::Node *op_node) -> bool {
return op_node->Op() && op_node->Op()->Type() == scale_op_name;
});
PADDLE_ENFORCE(scale_op_iter != beta_pow_node->outputs.end());
PADDLE_ENFORCE_NE(
scale_op_iter, beta_pow_node->outputs.end(),
platform::errors::NotFound("Can not find %s in beta pow node.",
scale_op_name));
scale_ops.emplace_back(*scale_op_iter);
}
PADDLE_ENFORCE_EQ(scale_ops.size(), beta_name.size());
PADDLE_ENFORCE_EQ(
scale_ops.size(), beta_name.size(),
platform::errors::PreconditionNotMet(
"Beta name size(%d) must equal to scale ops size(%d).",
beta_name.size(), scale_ops.size()));
VLOG(6) << "The number of scale op is " << scale_ops.size() << ".";
// Check attributions
// NOTE: If new attribution is added, the following code maybe need change.
......@@ -193,16 +250,40 @@ class FuseAdamOpPass : public FuseOptimizerOpPass {
BOOST_GET_CONST(bool, scale_ops[0]->Op()->GetAttr("bias_after_scale"));
for (auto &scale_op : scale_ops) {
PADDLE_ENFORCE_EQ(
scale, BOOST_GET_CONST(float, scale_op->Op()->GetAttr("scale")));
scale, BOOST_GET_CONST(float, scale_op->Op()->GetAttr("scale")),
platform::errors::PreconditionNotMet(
"All scale Op's attr(scale) must be same, but there are two "
"different "
"value: %f, %f.",
scale, BOOST_GET_CONST(float, scale_op->Op()->GetAttr("scale"))));
PADDLE_ENFORCE_EQ(
bias, BOOST_GET_CONST(float, scale_op->Op()->GetAttr("bias")));
bias, BOOST_GET_CONST(float, scale_op->Op()->GetAttr("bias")),
platform::errors::PreconditionNotMet(
"All scale Op's attr(bias) must be same, but there are two "
"different "
"value: %f, %f.",
bias, BOOST_GET_CONST(float, scale_op->Op()->GetAttr("bias"))));
PADDLE_ENFORCE_EQ(
bias_after_scale,
BOOST_GET_CONST(bool, scale_op->Op()->GetAttr("bias_after_scale")));
BOOST_GET_CONST(bool, scale_op->Op()->GetAttr("bias_after_scale")),
platform::errors::PreconditionNotMet(
"All scale Op's attr(bias_after_scale) must be same, but there "
"are two different value: %d, %d.",
bias_after_scale,
BOOST_GET_CONST(bool,
scale_op->Op()->GetAttr("bias_after_scale"))));
PADDLE_ENFORCE_EQ(
op_role,
BOOST_GET_CONST(int, scale_op->Op()->GetAttr(
OpProtoAndCheckerMaker::OpRoleAttrName())));
OpProtoAndCheckerMaker::OpRoleAttrName())),
platform::errors::PreconditionNotMet(
"All scale Op's attr(op_role) must be same, but there are two "
"different "
"value: %d, %d.",
op_role,
BOOST_GET_CONST(int,
scale_op->Op()->GetAttr(
OpProtoAndCheckerMaker::OpRoleAttrName()))));
}
// NOTE: fused_var is only exist in scope, so the graph doesn't have
......
......@@ -37,7 +37,9 @@ class FuseMomentumOpPass : public FuseOptimizerOpPass {
const std::unordered_map<std::string, std::vector<std::string>> &vars_set,
const std::unordered_map<std::string, std::string> &fused_vars_name,
const std::vector<ir::Node *> &momentum_ops, ir::Graph *graph) const {
PADDLE_ENFORCE_GT(momentum_ops.size(), static_cast<size_t>(0));
PADDLE_ENFORCE_GT(
momentum_ops.size(), static_cast<size_t>(0),
platform::errors::InvalidArgument("Momentum ops must not be empyt."));
// Check attributions
// NOTE: If new attribution is added, the following code maybe need change.
......@@ -50,14 +52,32 @@ class FuseMomentumOpPass : public FuseOptimizerOpPass {
for (auto &momentum_op : momentum_ops) {
PADDLE_ENFORCE_EQ(
mu, BOOST_GET_CONST(float, momentum_op->Op()->GetAttr("mu")));
mu, BOOST_GET_CONST(float, momentum_op->Op()->GetAttr("mu")),
platform::errors::InvalidArgument(
"All momentum Op's attr(mu) must be same, but there are two "
"different "
"value: %f, %f.",
mu, BOOST_GET_CONST(float, momentum_op->Op()->GetAttr("mu"))));
PADDLE_ENFORCE_EQ(
use_nesterov,
BOOST_GET_CONST(bool, momentum_op->Op()->GetAttr("use_nesterov")));
BOOST_GET_CONST(bool, momentum_op->Op()->GetAttr("use_nesterov")),
platform::errors::InvalidArgument(
"All momentum Op's attr(use_nesterov) must be same, but there "
"are two different value: %d, %d.",
use_nesterov, BOOST_GET_CONST(bool, momentum_op->Op()->GetAttr(
"use_nesterov"))));
PADDLE_ENFORCE_EQ(
op_role,
BOOST_GET_CONST(int, momentum_op->Op()->GetAttr(
OpProtoAndCheckerMaker::OpRoleAttrName())));
OpProtoAndCheckerMaker::OpRoleAttrName())),
platform::errors::InvalidArgument(
"All momentum Op's attr(op_role) must be same, but there are two "
"different "
"value: %d, %d.",
op_role,
BOOST_GET_CONST(int,
momentum_op->Op()->GetAttr(
OpProtoAndCheckerMaker::OpRoleAttrName()))));
}
// NOTE: fused_var is only exist in scope, so the graph doesn't have
......
......@@ -41,10 +41,12 @@ void FuseOptimizerOpPass::ApplyImpl(ir::Graph *graph) const {
for (auto &node : topo_nodes) {
if (node->Op()->Type() == fuse_op_type) {
auto grad_name = node->Op()->Input(kGrad);
PADDLE_ENFORCE_EQ(grad_name.size(), static_cast<size_t>(1),
"The %s operator has multiple gradient input. Expected "
"it to only have one gradient input.",
fuse_op_type);
PADDLE_ENFORCE_EQ(
grad_name.size(), static_cast<size_t>(1),
platform::errors::InvalidArgument(
"The %s operator has multiple gradient input. Expected "
"it to only have one gradient input.",
fuse_op_type));
if (IsLoDTensorType(GetTypeOfVar(vars_info, grad_name[0]))) {
opt_nodes.emplace_back(node);
}
......@@ -96,7 +98,8 @@ void FuseOptimizerOpPass::ApplyImpl(ir::Graph *graph) const {
VLOG(6) << var_name << ": " << fused_var_name;
PADDLE_ENFORCE_EQ(
fused_var_set.count(fused_var_name), 0,
platform::errors::AlreadyExists("The fused variable already exists."));
platform::errors::AlreadyExists(
"The fused variable(%s) already exists.", fused_var_name));
fused_var_set.insert(fused_var_name);
fused_vars_name.emplace(var_name, fused_var_name);
}
......@@ -110,7 +113,10 @@ void FuseOptimizerOpPass::ApplyImpl(ir::Graph *graph) const {
result.Get<details::ParamsAndGrads>(details::kParamsAndDenseGrads);
PADDLE_ENFORCE_LE(
params_and_dense_grads.size(), aux_var_map.at(kGrad).size(),
"The number of dense gradients should be little than optimizer ops.");
platform::errors::InvalidArgument(
"The number of dense gradients(%d) should be "
"little than optimizer ops(%d).",
params_and_dense_grads.size(), aux_var_map.at(kGrad).size()));
std::unordered_set<std::string> opt_grad_set(aux_var_map.at(kGrad).size());
for (auto &p_g : params_and_dense_grads) {
......@@ -130,13 +136,14 @@ void FuseOptimizerOpPass::ApplyImpl(ir::Graph *graph) const {
// some gradient's name maybe changed.
if (new_grad_idx.size() == 0) {
if (!result.Has(details::kFusedGrads)) {
PADDLE_THROW(
PADDLE_THROW(platform::errors::PreconditionNotMet(
"The coalesce_grad_tensor_pass should "
"be called before this pass.");
"be called before this pass."));
}
auto &fused_grad = result.Get<details::FusedGrads>(details::kFusedGrads);
PADDLE_ENFORCE_NE(fused_grad.size(), 0,
"The fused gradient should not be empty.");
platform::errors::NotFound(
"The fused gradient should not be empty."));
if (fused_grad.size() > 1) {
// Note(chenweihang): Because the dtype of those gradients is not
// unified,so the number of fused gradients is more than one,
......@@ -146,8 +153,9 @@ void FuseOptimizerOpPass::ApplyImpl(ir::Graph *graph) const {
auto &fused_vars = result.Get<details::FusedVars>(details::kFusedVars);
auto iter =
std::find(fused_vars.begin(), fused_vars.end(), fused_grad.front());
PADDLE_ENFORCE_EQ(iter != fused_vars.end(), true,
"Not found the fused gradient variable.");
PADDLE_ENFORCE_EQ(
iter != fused_vars.end(), true,
platform::errors::NotFound("Not found the fused gradient variable."));
fused_vars_name[kGrad] = fused_grad.front();
// Sort the parameters and auxiliary variables according
......@@ -334,16 +342,24 @@ void FuseOptimizerOpPass::FuseGradientsToContinuousSpace(
// The Gradients should not be reused during memory optimization.
for (auto &grad_var_name : grads) {
auto iter = vars_info.find(grad_var_name);
PADDLE_ENFORCE_EQ(iter != vars_info.end(), true,
"The gradient variable %s is not found.", grad_var_name);
PADDLE_ENFORCE_EQ(!iter->second.empty(), true,
"The gradient var node %s is not found.", grad_var_name);
PADDLE_ENFORCE_NOT_NULL(iter->second.front()->Var(),
"The gradient var node is null.");
PADDLE_ENFORCE_EQ(
iter != vars_info.end(), true,
platform::errors::NotFound("The gradient variable %s is not found.",
grad_var_name));
PADDLE_ENFORCE_EQ(
!iter->second.empty(), true,
platform::errors::NotFound("The gradient var node %s is not found.",
grad_var_name));
PADDLE_ENFORCE_NOT_NULL(
iter->second.front()->Var(),
platform::errors::InvalidArgument("The gradient var(%s) node is null.",
grad_var_name));
PADDLE_ENFORCE_EQ(
IsLoDTensorType(iter->second.front()->Var()->GetType()), true,
"Currently the gradient type only should be LoDTensor when "
"fusing optimizer ops.");
platform::errors::InvalidArgument(
"Currently the gradient(%s) type only should be LoDTensor when "
"fusing optimizer ops.",
grad_var_name));
for (auto var : iter->second) {
pinned_var_set.insert(var->Var()->Name());
}
......@@ -382,11 +398,14 @@ const VarDesc *FuseOptimizerOpPass::GetVarDescFromVarsInfo(
const std::string &var_name) const {
auto grad_iter = vars_info.find(var_name);
PADDLE_ENFORCE_EQ(grad_iter != vars_info.end(), true,
"The gradient variable %s is not found.", var_name);
platform::errors::NotFound(
"The gradient variable %s is not found.", var_name));
PADDLE_ENFORCE_EQ(!grad_iter->second.empty(), true,
"The gradient var node %s is not found.", var_name);
platform::errors::NotFound(
"The gradient var node %s is not found.", var_name));
PADDLE_ENFORCE_NOT_NULL(grad_iter->second.front()->Var(),
"The gradient var node is null.");
platform::errors::InvalidArgument(
"The gradient var(%s) node is null.", var_name));
return grad_iter->second.front()->Var();
}
......@@ -428,8 +447,9 @@ void FuseOptimizerOpPass::SortParametersAndAuxVars(
const std::vector<std::pair<std::string, std::string>> &params_grads,
std::unordered_map<std::string, std::vector<std::string>> *aux_var_map,
std::vector<ir::Node *> *ops) const {
PADDLE_ENFORCE_NE(aux_var_map->count(kGrad), static_cast<size_t>(0),
"The gradient variable doesn‘t exist.");
PADDLE_ENFORCE_NE(
aux_var_map->count(kGrad), static_cast<size_t>(0),
platform::errors::NotFound("The gradient variable doesn‘t exist."));
auto &grad_vec = aux_var_map->at(kGrad);
std::vector<size_t> grad_sort_idx;
......@@ -437,8 +457,10 @@ void FuseOptimizerOpPass::SortParametersAndAuxVars(
for (auto &p_g : params_grads) {
auto iter = std::find(grad_vec.begin(), grad_vec.end(), p_g.second);
PADDLE_ENFORCE_EQ(iter != grad_vec.end(), true,
"%s is not found in gradient vector", p_g.second);
PADDLE_ENFORCE_EQ(
iter != grad_vec.end(), true,
platform::errors::NotFound(
"Parameter@Grad(%s) is not found in gradient vector.", p_g.second));
auto idx = std::distance(grad_vec.begin(), iter);
grad_sort_idx.emplace_back(idx);
}
......@@ -477,9 +499,10 @@ void FuseOptimizerOpPass::GetFusingVarNamesMap(
for (auto &var_n : aux_vars_name) {
auto arg_names = node->Op()->Input(var_n);
PADDLE_ENFORCE_EQ(arg_names.size(), static_cast<size_t>(1),
"The input variable of optimizer to be fused is "
"invalid. Excepted %s only has one %s input.",
node->Op()->Type(), var_n);
platform::errors::InvalidArgument(
"The input variable of optimizer to be fused is "
"invalid. Excepted %s only has one %s input.",
node->Op()->Type(), var_n));
(*aux_args_name)[var_n].emplace_back(arg_names[0]);
}
}
......@@ -525,10 +548,14 @@ void FuseOptimizerOpPass::InsertInputAndOutputForFusedOpNode(
auto deal_with_ctrl_vars = [&out_dep_vars, &not_useful_vars,
&fused_opt_node](ir::Node *ctr_var_node) {
PADDLE_ENFORCE_EQ(ctr_var_node->inputs.size(), 1,
"The control var node has nultiple inputs.");
platform::errors::InvalidArgument(
"The control var(%s) node has multiple inputs.",
ctr_var_node->Name()));
if (ctr_var_node->inputs.front() == fused_opt_node) {
PADDLE_ENFORCE_GT(ctr_var_node->outputs.size(), 0,
"The control var node has no output.");
PADDLE_ENFORCE_GT(
ctr_var_node->outputs.size(), 0,
platform::errors::InvalidArgument(
"The control var(%s) node has no output.", ctr_var_node->Name()));
auto output_ops = ctr_var_node->outputs;
output_ops.erase(std::remove_if(output_ops.begin(), output_ops.end(),
[&fused_opt_node](const ir::Node *node) {
......
......@@ -35,7 +35,9 @@ class FuseSgdOpPass : public FuseOptimizerOpPass {
const std::unordered_map<std::string, std::vector<std::string>> &vars_set,
const std::unordered_map<std::string, std::string> &fused_vars_name,
const std::vector<ir::Node *> &sgd_ops, ir::Graph *graph) const {
PADDLE_ENFORCE_GT(sgd_ops.size(), static_cast<size_t>(0));
PADDLE_ENFORCE_GT(
sgd_ops.size(), static_cast<size_t>(0),
platform::errors::InvalidArgument("SGD ops must not be empyt."));
// NOTE: fused_var is only exist in scope, so the graph doesn't have
// fused_var node.
......
......@@ -116,7 +116,10 @@ std::vector<OpHandleBase *> BufferSharedCrossOpMemoryReusePass::SortOp(
graph_view.BreadthFirstVisit(
[&](OpHandleBase *cur_op) { sorted_ops.emplace_back(cur_op); });
PADDLE_ENFORCE_EQ(sorted_ops.size(), graph_view.OpNumber(),
"There are unvisited ops");
platform::errors::InvalidArgument(
"Sorted ops size(%d) not equal to graph op size(%d). "
"There are unvisited ops.",
sorted_ops.size(), graph_view.OpNumber()));
return sorted_ops;
}
......@@ -181,7 +184,9 @@ void BufferSharedCrossOpMemoryReusePass::RunOnScopeIdx(size_t idx) const {
auto *out_node = *(out_nodes.begin());
auto *out_var =
dynamic_cast<VarHandle *>(&(out_node->Wrapper<VarHandleBase>()));
PADDLE_ENFORCE_NOT_NULL(out_var);
PADDLE_ENFORCE_NOT_NULL(
out_var, platform::errors::NotFound(
"Can not find a valid Var Node for Var %s.", out_arg));
// If out_arg is not reusable, skip it
if (!IsOutVarReusable(*out_var)) {
......@@ -269,7 +274,8 @@ size_t BufferSharedCrossOpMemoryReusePass::ResolveDependencyBetween(
auto op_dep = GetOpDep(prev_op, op);
if (op_dep == NodeDependency::kBefore) continue;
PADDLE_ENFORCE_EQ(op_dep, NodeDependency::kNoDep,
"The graph has circle, this may be a bug");
platform::errors::InvalidArgument(
"The graph has circle, this may be a bug."));
auto iter =
std::find_if(prev_op->Outputs().begin(), prev_op->Outputs().end(),
......@@ -316,9 +322,13 @@ size_t BufferSharedCrossOpMemoryReusePass::ResolveDependencyBetween(
}
void BufferSharedCrossOpMemoryReusePass::BuildOpDependencyMap() const {
PADDLE_ENFORCE(ops_.empty(), "ops_ must be initialized here");
PADDLE_ENFORCE(op_to_idx_.empty(), "op_to_idx_ must be initialized here");
PADDLE_ENFORCE(deps_.empty(), "deps_ must be initialized here");
PADDLE_ENFORCE_EQ(ops_.empty(), true, platform::errors::InvalidArgument(
"Ops must be initialized here."));
PADDLE_ENFORCE_EQ(
op_to_idx_.empty(), true,
platform::errors::InvalidArgument("Op to idx must be initialized here."));
PADDLE_ENFORCE_EQ(deps_.empty(), true, platform::errors::InvalidArgument(
"Deps must be initialized here."));
// Toposort ops
OpGraphView graph_view(ir::FilterByNodeWrapper<OpHandleBase>(*graph_));
......@@ -344,7 +354,10 @@ void BufferSharedCrossOpMemoryReusePass::BuildOpDependencyMap() const {
prev_preceding_ops.end());
}
});
PADDLE_ENFORCE_EQ(preceding_ops.size(), op_num);
PADDLE_ENFORCE_EQ(preceding_ops.size(), op_num,
platform::errors::InvalidArgument(
"Preceding ops size(%d) must equal to op num(%d).",
preceding_ops.size(), op_num));
// Find out ComputationOpHandles only
ops_.resize(scope_num);
......@@ -384,28 +397,43 @@ void BufferSharedCrossOpMemoryReusePass::BuildOpDependencyMap() const {
size_t BufferSharedCrossOpMemoryReusePass::OpIndex(
const ComputationOpHandle *op) const {
auto iter = op_to_idx_[op->GetScopeIdx()].find(op);
PADDLE_ENFORCE(iter != op_to_idx_[op->GetScopeIdx()].end());
PADDLE_ENFORCE_NE(iter, op_to_idx_[op->GetScopeIdx()].end(),
platform::errors::NotFound(
"Can not find op(%s) in op_to_idx_.", op->Name()));
return iter->second;
}
NodeDependency BufferSharedCrossOpMemoryReusePass::GetOpDep(
const ComputationOpHandle *op1, const ComputationOpHandle *op2) const {
PADDLE_ENFORCE_EQ(op1->GetScopeIdx(), op2->GetScopeIdx());
PADDLE_ENFORCE_EQ(op1->GetScopeIdx(), op2->GetScopeIdx(),
platform::errors::InvalidArgument(
"Op(%s) and op(%s) must in the same scope.",
op1->Name(), op2->Name()));
return deps_[op1->GetScopeIdx()][OpIndex(op1)][OpIndex(op2)];
}
void BufferSharedCrossOpMemoryReusePass::SetOpDep(
const ComputationOpHandle *op1, const ComputationOpHandle *op2,
NodeDependency dep) const {
PADDLE_ENFORCE_EQ(op1->GetScopeIdx(), op2->GetScopeIdx());
PADDLE_ENFORCE_EQ(op1->GetScopeIdx(), op2->GetScopeIdx(),
platform::errors::InvalidArgument(
"Op(%s) and op(%s) must in the same scope.",
op1->Name(), op2->Name()));
if (op1 == op2) {
PADDLE_ENFORCE(dep == NodeDependency::kSame);
PADDLE_ENFORCE_EQ(
dep, NodeDependency::kSame,
platform::errors::InvalidArgument(
"Set Same Op(%s) Dep, dep must be kSame type.", op1->Name()));
auto idx = OpIndex(op1);
deps_[op1->GetScopeIdx()][idx][idx] = NodeDependency::kSame;
} else {
auto idx1 = OpIndex(op1);
auto idx2 = OpIndex(op2);
PADDLE_ENFORCE(dep != NodeDependency::kSame && idx1 != idx2);
PADDLE_ENFORCE_EQ((dep != NodeDependency::kSame && idx1 != idx2), true,
platform::errors::InvalidArgument(
"Op(%s) and Op(%s) should not have same "
"index(%d), and dep should not kSame type.",
op1->Name(), op2->Name(), idx1));
deps_[op1->GetScopeIdx()][idx1][idx2] = dep;
deps_[op1->GetScopeIdx()][idx2][idx1] = ReverseNodeDependency(dep);
}
......
......@@ -57,7 +57,9 @@ void BufferSharedInplaceOpPass::Run(Graph *graph) const {
auto *op = *(pair.second.ops().begin());
const std::string &op_type = op->GetOp()->Type();
const framework::OpDesc *op_desc = op->Node()->Op();
PADDLE_ENFORCE_NOT_NULL(op_desc);
PADDLE_ENFORCE_NOT_NULL(
op_desc, platform::errors::NotFound("Op(%s) can not find opdesc.",
op->Name()));
auto &infer_inplace = OpInfoMap::Instance().Get(op_type).infer_inplace_;
if (!infer_inplace) {
......
......@@ -58,8 +58,12 @@ static int64_t GetMemorySize(
&vars,
const std::string &var_name) {
auto *var_desc = TryGetLatestVarDesc(vars.at(var_name));
PADDLE_ENFORCE_NOT_NULL(var_desc);
PADDLE_ENFORCE(IsLoDTensor(var_desc));
PADDLE_ENFORCE_NOT_NULL(
var_desc,
platform::errors::NotFound("Var(%s) can not find VarDesc.", var_name));
PADDLE_ENFORCE_EQ(IsLoDTensor(var_desc), true,
platform::errors::InvalidArgument(
"Var(%s) must be LoDTensor.", var_name));
auto dims = var_desc->GetShape();
return SizeOfType(var_desc->GetDataType()) *
std::accumulate(dims.begin(), dims.end(), static_cast<int64_t>(1),
......
......@@ -42,8 +42,10 @@ class MemOptVarInfo {
}
void SetRefCnt(size_t ref_cnt) {
PADDLE_ENFORCE_GE(ref_cnt, 1,
"Reference count must be larger than or equal to 1");
PADDLE_ENFORCE_GE(
ref_cnt, 1,
platform::errors::InvalidArgument(
"Reference count(%d) must be larger than or equal to 1.", ref_cnt));
ref_cnt_ = ref_cnt;
runtime_ref_cnt_ = ref_cnt;
}
......
......@@ -66,7 +66,11 @@ bool MemoryReusePass::TryReuseVar(details::VarHandle *in_var,
details::VarHandle *out_var) const {
auto *op =
dynamic_cast<details::ComputationOpHandle *>(out_var->GeneratedOp());
PADDLE_ENFORCE_NOT_NULL(op);
PADDLE_ENFORCE_NOT_NULL(
op,
platform::errors::InvalidArgument(
"Var(%s) have no GeneratedOp, or it's op is not ComputationOpHandle.",
out_var->Name()));
if (IsVarPairReusable(*in_var, *out_var)) {
AddReuseVar(op, in_var, out_var);
return true;
......@@ -91,10 +95,13 @@ VarDesc *MemoryReusePass::GetVarDesc(const details::VarHandle &var) const {
size_t scope_idx = var.scope_idx();
auto iter = var_descs_[scope_idx].find(var_name);
if (iter == var_descs_[scope_idx].end()) {
PADDLE_ENFORCE((*all_vars_)[scope_idx].count(var_name),
"Variable %s not found", var_name);
PADDLE_ENFORCE_NE(
(*all_vars_)[scope_idx].count(var_name), 0,
platform::errors::NotFound("Variable %s not found.", var_name));
auto *desc = TryGetLatestVarDesc((*all_vars_)[scope_idx].at(var_name));
PADDLE_ENFORCE_NOT_NULL(desc);
PADDLE_ENFORCE_NOT_NULL(
desc,
platform::errors::NotFound("Var(%s) can not find VarDesc.", var_name));
var_descs_[scope_idx].emplace(var_name, desc);
return desc;
} else {
......@@ -119,7 +126,9 @@ void MemoryReusePass::CollectShareTensorBufferOpHandles() const {
if (share_buffer_op != nullptr) {
auto *compute_op =
details::GetUniquePendingComputationOpHandle(share_buffer_op);
PADDLE_ENFORCE(ops_.count(compute_op) == 0);
PADDLE_ENFORCE_EQ(
ops_.count(compute_op), 0,
platform::errors::AlreadyExists("Compute op already exists."));
ops_.emplace(compute_op, share_buffer_op);
}
}
......@@ -227,8 +236,11 @@ bool MemoryReusePass::IsInVarReusable(const details::VarHandle &in_var) const {
*/
bool MemoryReusePass::IsOutVarReusable(
const details::VarHandle &out_var) const {
PADDLE_ENFORCE_NOT_NULL(dynamic_cast<const details::ComputationOpHandle *>(
out_var.GeneratedOp()));
PADDLE_ENFORCE_NOT_NULL(
dynamic_cast<const details::ComputationOpHandle *>(out_var.GeneratedOp()),
platform::errors::InvalidArgument(
"Var(%s) have no GeneratedOp, or it's op is not ComputationOpHandle.",
out_var.Name()));
const auto out_name = out_var.Name();
if (out_name == kEmptyVarName) {
return false;
......@@ -236,9 +248,10 @@ bool MemoryReusePass::IsOutVarReusable(
// out_var must be the first version!!!
auto out_var_iter = (*all_vars_)[out_var.scope_idx()].find(out_name);
PADDLE_ENFORCE(out_var_iter != (*all_vars_)[out_var.scope_idx()].end() &&
!out_var_iter->second.empty(),
"Cannot find variable %s", out_name);
PADDLE_ENFORCE_EQ(
(out_var_iter != (*all_vars_)[out_var.scope_idx()].end() &&
!out_var_iter->second.empty()),
true, platform::errors::NotFound("Cannot find variable %s.", out_name));
if (out_var_iter->second[0] != &out_var) {
return false;
......@@ -282,7 +295,11 @@ bool MemoryReusePass::IsVarPairReusable(
const details::VarHandle &in_var, const details::VarHandle &out_var) const {
auto *op =
dynamic_cast<const details::ComputationOpHandle *>(out_var.GeneratedOp());
PADDLE_ENFORCE_NOT_NULL(op);
PADDLE_ENFORCE_NOT_NULL(
op,
platform::errors::InvalidArgument(
"Var(%s) have no GeneratedOp, or it's op is not ComputationOpHandle.",
out_var.Name()));
const auto in_name = in_var.Name();
if (in_name == out_var.Name()) {
......@@ -308,8 +325,10 @@ bool MemoryReusePass::IsVarPairReusable(
void MemoryReusePass::AddReuseVar(details::ComputationOpHandle *op,
details::VarHandle *in_var,
details::VarHandle *out_var) const {
PADDLE_ENFORCE((*var_infos_)[op->GetScopeIdx()].count(in_var->Name()) > 0,
"%s does not in mem-opt var infos", in_var->Name());
PADDLE_ENFORCE_GT(
(*var_infos_)[op->GetScopeIdx()].count(in_var->Name()), 0,
platform::errors::NotFound("Var(%s) does not in mem opt var infos.",
in_var->Name()));
if (ops_.count(op) == 0) {
InsertShareTensorBufferOpHandleToGraph(op);
......@@ -349,7 +368,10 @@ void MemoryReusePass::UpdateLastLiveOpOfVar(details::ComputationOpHandle *op,
if (out_var_op_iter == (*last_live_ops_of_vars_)[scope_idx].end()) {
last_live_op_of_in_var = op;
} else {
PADDLE_ENFORCE(!out_var_op_iter->second.ops().empty());
PADDLE_ENFORCE_EQ(
out_var_op_iter->second.ops().empty(), false,
platform::errors::InvalidArgument(
"Var(%s)'s last live op should not empty.", out_var->Name()));
last_live_op_of_in_var = *(out_var_op_iter->second.ops().begin());
}
......@@ -359,8 +381,9 @@ void MemoryReusePass::UpdateLastLiveOpOfVar(details::ComputationOpHandle *op,
last_live_ops_of_in_var->insert(last_live_op_of_in_var);
auto in_var_info_iter = (*var_infos_)[scope_idx].find(in_var->Name());
PADDLE_ENFORCE(in_var_info_iter != (*var_infos_)[scope_idx].end(),
"Cannot find variable %s", in_var->Name());
PADDLE_ENFORCE_NE(
in_var_info_iter, (*var_infos_)[scope_idx].end(),
platform::errors::NotFound("Cannot find variable %s.", in_var->Name()));
in_var_info_iter->second->SetRefCnt(1);
}
......
......@@ -39,7 +39,7 @@ void OpGraphView::Build(const std::vector<details::OpHandleBase *> &ops) {
}
PADDLE_ENFORCE(
preceding_ops_.size() == ops.size() && pending_ops_.size() == ops.size(),
"There are duplicate ops in graph.");
platform::errors::InvalidArgument("There are duplicate ops in graph."));
}
std::unordered_set<details::OpHandleBase *> OpGraphView::AllOps() const {
......@@ -56,8 +56,10 @@ bool OpGraphView::HasOp(details::OpHandleBase *op) const {
}
void OpGraphView::EnforceHasOp(details::OpHandleBase *op) const {
PADDLE_ENFORCE(HasOp(op), "Cannot find op %s in OpGraphView",
op == nullptr ? "nullptr" : op->DebugString());
PADDLE_ENFORCE_EQ(HasOp(op), true,
platform::errors::NotFound(
"Cannot find op %s in OpGraphView.",
op == nullptr ? "nullptr" : op->DebugString()));
}
const std::unordered_set<details::OpHandleBase *> &OpGraphView::PendingOps(
......
......@@ -127,9 +127,13 @@ void OpGraphView::BreadthFirstVisit(Callback &&callback) const {
}
}
PADDLE_ENFORCE_EQ(num_calls, op_num, "There are unvisited ops");
PADDLE_ENFORCE_EQ(visited_ops.size(), op_num, "There are unvisited ops");
PADDLE_ENFORCE(op_deps.empty(), "There are unvisited ops");
PADDLE_ENFORCE_EQ(num_calls, op_num, platform::errors::InvalidArgument(
"There are unvisited ops."));
PADDLE_ENFORCE_EQ(
visited_ops.size(), op_num,
platform::errors::InvalidArgument("There are unvisited ops."));
PADDLE_ENFORCE_EQ(op_deps.empty(), true, platform::errors::InvalidArgument(
"There are unvisited ops."));
}
} // namespace ir
......
......@@ -77,11 +77,15 @@ class ShrinkDepsOpFunctor {
const std::vector<details::OpHandleBase *> &ops) const {
std::unordered_map<details::OpHandleBase *, size_t> op_to_idx;
for (size_t i = 0; i < ops.size(); ++i) {
PADDLE_ENFORCE(graph_.HasOp(ops[i]), "Op does not exist in graph");
PADDLE_ENFORCE_EQ(
graph_.HasOp(ops[i]), true,
platform::errors::InvalidArgument("Op does not exist in graph."));
op_to_idx[ops[i]] = i;
}
PADDLE_ENFORCE(op_to_idx.size() == ops.size(), "Duplicate ops");
PADDLE_ENFORCE_EQ(
op_to_idx.size(), ops.size(),
platform::errors::InvalidArgument("Graph may have duplicate ops."));
std::vector<std::vector<RelationShip>> ret(ops.size());
for (auto &e : ret) {
......@@ -247,9 +251,9 @@ ExtractComputationOpFromLastLivedVar(details::VarHandle *var, size_t scope_idx,
return {};
}
PADDLE_ENFORCE_EQ(
computation_ops.empty(), false,
platform::errors::InvalidArgument("Computation ops should not be empty"));
PADDLE_ENFORCE_EQ(computation_ops.empty(), false,
platform::errors::InvalidArgument(
"Computation ops should not be empty."));
// stage four. Try to shrink computation op if they depend on each other.
// Get the smallest set of the most ops.
......@@ -263,8 +267,9 @@ void ReferenceCountPass::ApplyImpl(ir::Graph *graph) const {
Get<std::vector<LastLiveOpsOfVars>>(kLastLiveOpsOfVars);
PADDLE_ENFORCE(last_live_ops_of_vars.empty() && var_infos.empty(),
"Last Live Ops and Reference Counts of vars should be "
"initialized at here.");
platform::errors::InvalidArgument(
"Last live ops and reference counts of vars should be "
"initialized at here."));
const auto &vars = graph->Get<details::GraphVars>(details::kGraphVars);
......@@ -304,11 +309,15 @@ void ReferenceCountPass::ApplyImpl(ir::Graph *graph) const {
auto &var_name = name_var_pair.first;
auto &var_handles = name_var_pair.second;
PADDLE_ENFORCE_EQ(var_desc->Name(), var_name);
PADDLE_ENFORCE_EQ(
var_handles.empty(), false,
platform::errors::InvalidArgument("Variable %s not found", var_name));
var_desc->Name(), var_name,
platform::errors::InvalidArgument(
"A Var, it's VarName(%s) and DescName(%s) not same.", var_name,
var_desc->Name()));
PADDLE_ENFORCE_EQ(var_handles.empty(), false,
platform::errors::InvalidArgument(
"Variable %s not found.", var_name));
auto last_ver_var = var_handles.back();
if (last_ver_var->Node()->IsCtrlVar()) {
......@@ -327,12 +336,13 @@ void ReferenceCountPass::ApplyImpl(ir::Graph *graph) const {
continue;
}
PADDLE_ENFORCE_EQ(status, LastLiveOpSearchStatus::kSuccess,
platform::errors::InvalidArgument(
"Status(%d) must be success.", status));
PADDLE_ENFORCE_EQ(
status, LastLiveOpSearchStatus::kSuccess,
platform::errors::InvalidArgument("status must be success"));
PADDLE_ENFORCE_EQ(result.empty(), false,
platform::errors::NotFound(
"Last living ops of %s cannot be empty", var_name));
result.empty(), false,
platform::errors::NotFound("Last living ops of %s cannot be empty.",
var_name));
std::string last_live_ops_log_str;
for (auto &each_ret : result) {
......
......@@ -45,7 +45,9 @@ class AllReduceDepsPass : public ir::Pass {
for (size_t i = 0; i < all_reduce_op_handles.size(); ++i) {
auto op_handle =
dynamic_cast<details::NCCLOpHandleBase*>(all_reduce_op_handles[i]);
PADDLE_ENFORCE(op_handle, "op_handle must be NCCLOpHandleBase");
PADDLE_ENFORCE_NOT_NULL(op_handle,
platform::errors::InvalidArgument(
"Op handle must be NCCLOpHandleBase."));
op_handle->SetRunEnv(i, use_hierarchical_allreduce);
}
#endif
......@@ -95,7 +97,9 @@ class AllReduceDepsPass : public ir::Pass {
}
}
PADDLE_ENFORCE_NE(next_ready_ops.size(), 0, "There maybe have a cycle.");
PADDLE_ENFORCE_NE(
next_ready_ops.size(), 0,
platform::errors::InvalidArgument("There may be a cycle."));
ready_ops.clear();
std::swap(ready_ops, next_ready_ops);
GetSortedAllReduceOps(ready_ops, &all_reduce_op_handles);
......@@ -122,18 +126,25 @@ class AllReduceDepsPass : public ir::Pass {
// NOTE(zcd): For distributed training, it is important to keep the order of
// allReduce on each node consistent. Otherwise, hang may occur.
// Sort the current_all_reduce_op_handles according to the name of input.
sort(current_all_reduce_op_handles.begin(),
current_all_reduce_op_handles.end(),
[](const details::OpHandleBase* left,
const details::OpHandleBase* right) -> bool {
auto left_in_vars =
details::DynamicCast<details::VarHandle>(left->Inputs());
auto right_in_vars =
details::DynamicCast<details::VarHandle>(right->Inputs());
PADDLE_ENFORCE_GT(left_in_vars.size(), 0);
PADDLE_ENFORCE_GT(right_in_vars.size(), 0);
return left_in_vars[0]->Name() > right_in_vars[0]->Name();
});
sort(
current_all_reduce_op_handles.begin(),
current_all_reduce_op_handles.end(),
[](const details::OpHandleBase* left,
const details::OpHandleBase* right) -> bool {
auto left_in_vars =
details::DynamicCast<details::VarHandle>(left->Inputs());
auto right_in_vars =
details::DynamicCast<details::VarHandle>(right->Inputs());
PADDLE_ENFORCE_GT(left_in_vars.size(), 0,
platform::errors::InvalidArgument(
"OpHandle(%s) inputs size must greater than 0.",
left->Name()));
PADDLE_ENFORCE_GT(right_in_vars.size(), 0,
platform::errors::InvalidArgument(
"OpHandle(%s) inputs size must greater than 0.",
right->Name()));
return left_in_vars[0]->Name() > right_in_vars[0]->Name();
});
all_reduce_op_handles->insert(all_reduce_op_handles->end(),
current_all_reduce_op_handles.begin(),
......@@ -170,7 +181,10 @@ class AllReduceDepsPass : public ir::Pass {
break;
}
}
PADDLE_ENFORCE(find_valid_input, "Doesn't find valid input.");
PADDLE_ENFORCE_EQ(
find_valid_input, true,
platform::errors::NotFound(
"In OpHandle(%s) Doesn't find valid input.", op->Name()));
}
VLOG(10) << out2.str();
if (grads_of_stale_program != all_reduce_op_handles.size()) {
......
......@@ -179,9 +179,10 @@ class BackWardOpDepsPass : public ir::Pass {
// Currently, we assume that once gradient is generated, it can be
// broadcast, and each gradient is only broadcast once.
auto backward_vars = details::GetOpRoleVarsOrEmpty(op_desc);
PADDLE_ENFORCE_EQ(node->IsWrappedBy<details::OpHandleBase>(), true,
platform::errors::InvalidArgument(
"Node must be wrapped by OpHandleBase"));
PADDLE_ENFORCE_EQ(
node->IsWrappedBy<details::OpHandleBase>(), true,
platform::errors::InvalidArgument(
"Node(%s) must be wrapped by OpHandleBase.", node->Name()));
backward_op_handles->emplace_back(&node->Wrapper<details::OpHandleBase>());
......
......@@ -64,9 +64,10 @@ class FuseAllReduceOpPass : public ir::Pass {
PADDLE_ENFORCE_EQ(
all_reduce_ops.size(), grads.size(),
platform::errors::Unimplemented(
"The number of all_reduce OpHandle is not equal to the "
"number of grads. Maybe some gradients are sparse type, "
"it is not supported currently."));
"The number of all_reduce OpHandle(%d) is not equal to the "
"number of grads(%d). Maybe some gradients are sparse type, "
"it is not supported currently.",
all_reduce_ops.size(), grads.size()));
auto &group_params_grads = graph->Get<details::GroupParamsAndGrads>(
details::kGroupParamsAndDenseGrads);
......@@ -79,7 +80,10 @@ class FuseAllReduceOpPass : public ir::Pass {
for (auto &group_p_g : group_params_grads) {
size_t group_size = group_p_g.size();
PADDLE_ENFORCE_GT(group_size, static_cast<size_t>(0));
PADDLE_ENFORCE_GT(
group_size, static_cast<size_t>(0),
platform::errors::InvalidArgument(
"Parameter and Parameter@grad in one group, must not be empty."));
std::vector<ir::Node *> group_all_reduce_ops;
group_all_reduce_ops.reserve(group_size);
for (auto &p_g : group_p_g) {
......@@ -103,26 +107,40 @@ class FuseAllReduceOpPass : public ir::Pass {
all_reduce_ops.reserve(grads.size());
for (auto &node : result.Nodes()) {
if (node->IsOp()) {
PADDLE_ENFORCE(node->IsWrappedBy<details::OpHandleBase>());
PADDLE_ENFORCE_EQ(
node->IsWrappedBy<details::OpHandleBase>(), true,
platform::errors::InvalidArgument(
"Op Node(%s) should Wrapped by OpHandleBase.", node->Name()));
auto *all_reduce_op_handle = dynamic_cast<details::AllReduceOpHandle *>(
&node->Wrapper<details::OpHandleBase>());
if (all_reduce_op_handle) {
#if defined(PADDLE_WITH_DGC)
PADDLE_ENFORCE_NE(
all_reduce_op_handle->Name(), "sparse_all_reduce",
"DGC doesn't support fuse for now, if you want to use DGC "
"you need set strategy.fuse_all_reduce_ops = False.");
platform::errors::InvalidArgument(
"DGC doesn't support fuse for now, if you want to use DGC "
"you need set strategy.fuse_all_reduce_ops = False."));
#endif
auto inputs = details::DynamicCast<details::VarHandle>(
all_reduce_op_handle->Inputs());
PADDLE_ENFORCE_EQ(inputs.size(), num_place);
PADDLE_ENFORCE_EQ(inputs.size(), num_place,
platform::errors::InvalidArgument(
"The input size(%d) of all reduce op must "
"equal to place cnt(%d)!",
inputs.size(), num_place));
// The inputs' name should be the same.
auto &grad_name = inputs[0]->name();
for (size_t i = 1; i < inputs.size(); ++i) {
PADDLE_ENFORCE_EQ(inputs[i]->name(), grad_name,
"The input name should be the same.");
PADDLE_ENFORCE_EQ(
inputs[i]->name(), grad_name,
platform::errors::InvalidArgument(
"The input name should be the same.diff name: %s %s.",
inputs[i]->name(), grad_name));
}
PADDLE_ENFORCE_NE(grads.count(grad_name), static_cast<size_t>(0));
PADDLE_ENFORCE_NE(
grads.count(grad_name), static_cast<size_t>(0),
platform::errors::InvalidArgument(
"Parameter@grad(%s) must in grad set.", grad_name));
all_reduce_ops.emplace(grad_name, node);
}
}
......
......@@ -24,7 +24,10 @@ namespace ir {
class SSAGraghBuilderWithChecker : public ir::Pass {
protected:
void ApplyImpl(ir::Graph *graph) const override {
PADDLE_ENFORCE(IsValidGraph(graph));
PADDLE_ENFORCE_EQ(
IsValidGraph(graph), true,
platform::errors::InvalidArgument(
"In SSAGraghBuilderWithChecker, invalid Graph input."));
}
bool IsValidGraph(const ir::Graph *graph) const {
......
......@@ -163,7 +163,13 @@ void MultiDevSSAGraphBuilderBase::Init() const {
nccl_ctxs_ = multi_nccl_ctxs_->DefaultFlatCtx();
}
#endif
PADDLE_ENFORCE_EQ(places_.size(), local_scopes_.size());
PADDLE_ENFORCE_EQ(
places_.size(), local_scopes_.size(),
platform::errors::InvalidArgument(
"Places size and LocalScopes not equal "
"Places size(%d), LocalScopes size(%d) "
"If use multi devices, Places size must equas to LocalScopes size.",
places_.size(), local_scopes_.size()));
}
void MultiDevSSAGraphBuilderBase::ApplyImpl(ir::Graph *graph) const {
......@@ -500,7 +506,11 @@ void MultiDevSSAGraphBuilderBase::CreateAllReduceOp(ir::Graph *result,
SetCommunicationContext(op_handle, places_[i]);
auto &vars = result->Get<details::GraphVars>(details::kGraphVars)[i][og];
PADDLE_ENFORCE(!vars.empty());
PADDLE_ENFORCE_EQ(vars.empty(), false,
platform::errors::InvalidArgument(
"Can not find Var(%s) in Place[%d] "
"Paddle Can not add AllReduce OP for Var(%s).",
og, i, og));
auto &prev_grad = vars.back();
op_handle->AddInput(prev_grad);
VLOG(10) << "all_reduce_op_handle add input " << prev_grad->DebugString();
......@@ -566,7 +576,11 @@ details::VarHandle *MultiDevSSAGraphBuilderBase::CreateReduceOp(
auto &p = places_[i];
SetCommunicationContext(op_handle, p);
auto &vars = result->Get<details::GraphVars>(details::kGraphVars)[i][og];
PADDLE_ENFORCE(!vars.empty());
PADDLE_ENFORCE_EQ(vars.empty(), false,
platform::errors::InvalidArgument(
"Can not find Var(%s) in Place[%d] "
"Paddle Can not add Reduce OP for Var(%s).",
og, i, og));
auto &prev_grad = vars.back();
op_handle->AddInput(prev_grad);
}
......@@ -590,7 +604,11 @@ bool MultiDevSSAGraphBuilderBase::IsScaleLossOp(ir::Node *node) const {
bool MultiDevSSAGraphBuilderBase::IsSparseGradient(
const std::string &og) const {
PADDLE_ENFORCE(all_vars_.count(og) != 0);
PADDLE_ENFORCE_NE(all_vars_.count(og), 0,
platform::errors::InvalidArgument(
"Can not find Var(%s) in VarDescs "
"Paddle Can not add Collective OP for Var(%s).",
og, og));
return all_vars_.at(og)->GetType() == proto::VarType::SELECTED_ROWS;
}
......@@ -641,10 +659,20 @@ int BalanceVarSSAGraphBuilder::GetOpDeviceID(ir::Node *node) const {
std::vector<std::string>,
node->Op()->GetAttr(OpProtoAndCheckerMaker::OpRoleVarAttrName()));
PADDLE_ENFORCE_EQ(param_grad.size(), 2U);
PADDLE_ENFORCE_EQ(
param_grad.size(), 2U,
platform::errors::InvalidArgument(
"In Node %s, the size of attribute %s must be 2, include Parameter "
"and Parameter@Grad.",
node->Name(), OpProtoAndCheckerMaker::OpRoleVarAttrName()));
int dev_id = GetVarDeviceID(param_grad[1]);
PADDLE_ENFORCE_NE(dev_id, -1, "dev_id should not be -1.[%s, %s, %s]",
node->Op()->Type(), param_grad[0], param_grad[1]);
PADDLE_ENFORCE_NE(dev_id, -1, platform::errors::NotFound(
"Can not find Device ID, for NodeName:%s, "
"NodeType:%s, Param:%s, Param@Grad:%s"
"For this fault, you can consult the "
"Paddle technical personnel for answer ",
node->Name(), node->Op()->Type(),
param_grad[0], param_grad[1]));
return dev_id;
}
......@@ -654,10 +682,16 @@ size_t BalanceVarSSAGraphBuilder::GetAppropriateDeviceID(
for (auto var_name : var_names) {
if (all_vars_.find(var_name) == all_vars_.end()) continue;
auto var_desc = all_vars_.at(var_name);
PADDLE_ENFORCE_NOT_NULL(var_desc);
PADDLE_ENFORCE_NOT_NULL(var_desc,
platform::errors::NotFound(
"Can not find Var(%s) in Var Desc.", var_name));
auto dim = framework::make_ddim(var_desc->GetShape());
int64_t numel = framework::product(dim);
PADDLE_ENFORCE_GT(numel, 0);
PADDLE_ENFORCE_GT(numel, 0,
platform::errors::InvalidArgument(
"The numel of Var(%s) must greater than 0"
"Please check your code,about Var(%s) Shape.",
var_name, var_name));
numel_sum += numel;
}
......@@ -736,7 +770,12 @@ int ReduceSSAGraphBuilder::GetOpDeviceID(
std::vector<std::string>,
node->Op()->GetAttr(OpProtoAndCheckerMaker::OpRoleVarAttrName()));
PADDLE_ENFORCE_EQ(param_grad.size(), 2U);
PADDLE_ENFORCE_EQ(
param_grad.size(), 2U,
platform::errors::InvalidArgument(
"In Node %s, The size of attribute %s must be 2, include Parameter "
"and Parameter@Grad.",
node->Name(), OpProtoAndCheckerMaker::OpRoleVarAttrName()));
int dev_id = GetVarDeviceID(param_grad[1]);
if (dev_id == -1) {
......@@ -798,7 +837,12 @@ std::vector<ir::Node *> ReduceSSAGraphBuilder::SortForReduceMode(
}
}
PADDLE_ENFORCE_EQ(sorted_ops.size(), topo_ops.size());
PADDLE_ENFORCE_EQ(sorted_ops.size(), topo_ops.size(),
platform::errors::InvalidArgument(
"Sorted ops calc error!"
"The result for sorted ops size(%d) must be "
"equal to topo ops size(%d).",
sorted_ops.size(), topo_ops.size()));
ResetState();
return sorted_ops;
......@@ -820,14 +864,23 @@ bool DistSSAGraphBuilder::DealWithSpecialOp(ir::Graph *result,
bool insert_op = false;
if (OpHaveRole(*node, OpRole::kRPC)) {
int op_dev_id = CreateRPCOp(result, node);
PADDLE_ENFORCE(op_dev_id != -1,
"Can not schedule the RPC operator to the right place.");
PADDLE_ENFORCE_NE(op_dev_id, -1, platform::errors::InvalidArgument(
"Can not schedule the RPC operator to "
"the right place. NodeName:%s.",
node->Name()));
if (node->Op()->Type() == "recv") {
auto recv_vars_attr =
BOOST_GET_CONST(std::vector<std::string>,
node->Op()->GetNullableAttr(
OpProtoAndCheckerMaker::OpRoleVarAttrName()));
PADDLE_ENFORCE(recv_vars_attr.size() == 2UL); // [parameter, gradient]
PADDLE_ENFORCE_EQ(
recv_vars_attr.size(), 2UL,
platform::errors::InvalidArgument(
"In Node %s, the size of attribute %s must be 2, include "
"Parameter and Parameter@Grad.",
node->Name(),
OpProtoAndCheckerMaker::OpRoleVarAttrName())); // [parameter,
// gradient]
if (recv_vars_attr[0].find(".block") == std::string::npos) {
bcast_var_name_set_[op_dev_id].emplace(recv_vars_attr[0]);
}
......@@ -879,8 +932,9 @@ int DistSSAGraphBuilder::CreateRPCOp(ir::Graph *result, ir::Node *node) const {
if (node->Op()->Type() == "send") {
// TODO(paddle-dev): getting the first var is not safe.
op_dev_id = GetVarDeviceID(node->inputs[0]->Name());
PADDLE_ENFORCE(!ir::IsControlDepVar(*node->inputs[0]),
"This hack no longer holds, please fix.");
PADDLE_ENFORCE_EQ(ir::IsControlDepVar(*node->inputs[0]), false,
platform::errors::InvalidArgument(
"This hack no longer holds, please fix."));
// the variable name which contains .block means it was split by
// split_byref op
if (strategy_.reduce_ ==
......@@ -893,7 +947,12 @@ int DistSSAGraphBuilder::CreateRPCOp(ir::Graph *result, ir::Node *node) const {
auto send_param_grad = BOOST_GET_CONST(
std::vector<std::string>,
node->Op()->GetAttr(OpProtoAndCheckerMaker::OpRoleVarAttrName()));
PADDLE_ENFORCE_EQ(send_param_grad.size(), 2U);
PADDLE_ENFORCE_EQ(
send_param_grad.size(), 2U,
platform::errors::InvalidArgument(
"In Node %s, the size of attribute %s must be 2, include "
"Parameter and Parameter@Grad.",
node->Name(), OpProtoAndCheckerMaker::OpRoleVarAttrName()));
op_dev_id = GetAppropriateDeviceID({send_param_grad[1]});
VLOG(10) << "send grad " << input_var_names[0] << " origin "
<< send_param_grad[1] << " place: " << op_dev_id;
......@@ -926,9 +985,10 @@ int DistSSAGraphBuilder::CreateRPCOp(ir::Graph *result, ir::Node *node) const {
op_dev_id = 0;
}
PADDLE_ENFORCE(op_dev_id != -1, "can not find the right place for rpc op: %s",
node->Op()->Type());
PADDLE_ENFORCE_NE(
op_dev_id, -1,
platform::errors::NotFound("Can not find the right place for rpc op: %s.",
node->Op()->Type()));
// Create fetch_barrier op handle to enable output on all devices.
// **NOTE** fetch_barrier should output variables list same as recv op does.
if (node->Op()->Type() == "fetch_barrier") {
......@@ -956,7 +1016,10 @@ int DistSSAGraphBuilder::CreateRPCOp(ir::Graph *result, ir::Node *node) const {
int outvar_dev_id = op_dev_id;
if (node->Op()->Type() == "fetch_barrier") {
outvar_dev_id = GetVarDeviceID(output->Name());
PADDLE_ENFORCE_NE(outvar_dev_id, -1, "output name %s", output->Name());
PADDLE_ENFORCE_NE(outvar_dev_id, -1,
platform::errors::NotFound(
"Can not find the right place for the var: %s.",
output->Name()));
}
p = places_[outvar_dev_id];
ir::Node *new_node = nullptr;
......@@ -1007,13 +1070,14 @@ int DistSSAGraphBuilder::CreateDistTrainOp(ir::Graph *result,
} else {
LOG(ERROR) << "got unexpected dist op: " << node->Op()->Type();
PADDLE_THROW(
"the distribute training related op should be in [split_byref, "
"concat].");
platform::errors::Unimplemented("The distribute training related op "
"should be in [split_byref, concat]."));
}
PADDLE_ENFORCE(op_dev_id != -1,
"can not find right place for distributed op: %s",
node->Op()->Type());
PADDLE_ENFORCE_NE(op_dev_id, -1,
platform::errors::NotFound(
"Can not find right place for distributed op: %s.",
node->Op()->Type()));
CreateComputationalOp(result, node, op_dev_id);
return op_dev_id;
......
......@@ -28,7 +28,10 @@ class SSAGraghBuilderWithPrinterPass : public ir::Pass {
void ApplyImpl(ir::Graph *graph) const override {
std::unique_ptr<std::ostream> fout(
new std::ofstream(Get<std::string>(kGraphvizPath)));
PADDLE_ENFORCE(fout->good());
PADDLE_ENFORCE_EQ(
fout->good(), true,
platform::errors::Unavailable("Open file fail! kGraphvizPath = %s.",
Get<std::string>(kGraphvizPath)));
if (Has("graph_printer")) {
Get<GraphvizSSAGraphPrinter>("graph_printer").Print(*graph, *fout);
} else {
......
......@@ -54,11 +54,16 @@ class SequentialExecutionPass : public ir::Pass {
if (!node->IsOp()) continue;
std::unordered_set<ir::Node *> preceding_ops;
for (auto *in : node->inputs) {
PADDLE_ENFORCE(in->IsVar(),
"Preceding Node of Op Nodes must be Var Node");
PADDLE_ENFORCE_EQ(
in->IsVar(), true,
platform::errors::InvalidArgument(
"Preceding Node(%s) of Op Nodes must be Var Node.",
in->Name()));
if (in->inputs.empty()) continue;
PADDLE_ENFORCE(in->inputs.size() == 1 && in->inputs[0]->IsOp(),
"Preceding Op Node of Var Node must be unique");
PADDLE_ENFORCE_EQ((in->inputs.size() == 1 && in->inputs[0]->IsOp()),
true,
platform::errors::InvalidArgument(
"Preceding Op Node of Var Node must be unique."));
preceding_ops.insert(in->inputs[0]);
pending_ops[in->inputs[0]].insert(node);
}
......@@ -72,15 +77,18 @@ class SequentialExecutionPass : public ir::Pass {
ir::Node *found_node = nullptr;
for (auto *node : ready_ops) {
if (IsSameOpDesc(op_desc, node->Op())) {
PADDLE_ENFORCE(found_node == nullptr,
"Found multiple op_desc in graph: %s",
op_desc->Type());
PADDLE_ENFORCE_EQ(
found_node, nullptr,
platform::errors::InvalidArgument(
"Found multiple op_desc in graph: %s.", op_desc->Type()));
found_node = node;
}
}
PADDLE_ENFORCE_NOT_NULL(found_node, "Cannot find op_desc in graph: %s",
op_desc->Type());
PADDLE_ENFORCE_NOT_NULL(
found_node,
platform::errors::NotFound("Cannot find op_desc in graph: %s.",
op_desc->Type()));
for (auto *pending_op : pending_ops[found_node]) {
if (--op_deps.at(pending_op) == 0) {
ready_ops.insert(pending_op);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册