diff --git a/paddle/fluid/operators/distributed_ops/recv_op.cc b/paddle/fluid/operators/distributed_ops/recv_op.cc index 30353ef35d754eae5bf98209b7c5ad5d25683cb4..aad9aefed4ecc4aa4241ae48f7743ec6ad7ce024 100644 --- a/paddle/fluid/operators/distributed_ops/recv_op.cc +++ b/paddle/fluid/operators/distributed_ops/recv_op.cc @@ -84,7 +84,9 @@ class RecvOp : public framework::OperatorBase { } for (size_t i = 0; i < rets.size(); i++) { VLOG(7) << "before sync_recv " << outs[i] << "from " << epmap[i]; - PADDLE_ENFORCE_NE(rets[i]->Wait(), 0U, "internal error in RPCClient"); + PADDLE_ENFORCE_NE( + rets[i]->Wait(), 0U, + platform::errors::ExecutionTimeout("internal error in RPCClient")); VLOG(7) << "after sync_recv " << outs[i] << "from " << epmap[i]; } } diff --git a/paddle/fluid/operators/distributed_ops/ref_by_trainer_id_op.cc b/paddle/fluid/operators/distributed_ops/ref_by_trainer_id_op.cc index 6bf70844491fe1b21c7f55ff6189e2628e84a7a4..befdf4e938850cad4180d3d7a66cc67ea1b2e810 100644 --- a/paddle/fluid/operators/distributed_ops/ref_by_trainer_id_op.cc +++ b/paddle/fluid/operators/distributed_ops/ref_by_trainer_id_op.cc @@ -27,14 +27,23 @@ class RefByTrainerIdOp : public framework::OperatorWithKernel { : OperatorWithKernel(type, inputs, outputs, attrs) {} void InferShape(framework::InferShapeContext *ctx) const override { - PADDLE_ENFORCE(ctx->HasInputs("X"), - "Input(X) of RefByTrainerIdOp should not be null."); - PADDLE_ENFORCE(ctx->HasInput("TrainerId"), - "Input(TrainerId) of RefByTrainerIdOp should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("Out"), - "Output(Out) of RefByTrainerIdOp should not be null."); - PADDLE_ENFORCE_EQ(ctx->GetInputDim("TrainerId").size(), 1, - "TrainerId should be a scalar."); + PADDLE_ENFORCE_EQ(ctx->HasInputs("X"), true, + platform::errors::InvalidArgument( + "Input(X) of RefByTrainerIdOp should not be null.")); + + PADDLE_ENFORCE_EQ( + ctx->HasInput("TrainerId"), true, + platform::errors::InvalidArgument( + "Input(TrainerId) of RefByTrainerIdOp should not be null.")); + + PADDLE_ENFORCE_EQ( + ctx->HasOutput("Out"), true, + platform::errors::InvalidArgument( + "Output(Out) of RefByTrainerIdOp should not be null.")); + + PADDLE_ENFORCE_EQ( + ctx->GetInputDim("TrainerId").size(), 1, + platform::errors::InvalidArgument("TrainerId should be a scalar.")); // Out's shape is determined at runtime. } diff --git a/paddle/fluid/operators/distributed_ops/ref_by_trainer_id_op.h b/paddle/fluid/operators/distributed_ops/ref_by_trainer_id_op.h index 49dde44d6b65eecb1859dd4697c4a6440ceaeadb..d372674555afe4738100e1b63336d2bd7b887ff3 100644 --- a/paddle/fluid/operators/distributed_ops/ref_by_trainer_id_op.h +++ b/paddle/fluid/operators/distributed_ops/ref_by_trainer_id_op.h @@ -38,7 +38,10 @@ class RefByTrainerIdKernel : public framework::OpKernel { } else { trainer_id = *trainer_id_data; } - PADDLE_ENFORCE_LT((size_t)trainer_id, in_list.size()); + PADDLE_ENFORCE_LT((size_t)trainer_id, in_list.size(), + platform::errors::InvalidArgument( + "X' size must >= TrainerId: [%s], but received [%s]", + trainer_id, in_list.size())); out->mutable_data(context.GetPlace()); framework::TensorCopy(*(in_list[trainer_id]), in_list[trainer_id]->place(), out); diff --git a/paddle/fluid/operators/distributed_ops/send_barrier_op.cc b/paddle/fluid/operators/distributed_ops/send_barrier_op.cc index 69c0726b20c1cab16d1cd84eb54d50653f11f81b..a8e9379d214d8f85fd9847fa79046275fe595a18 100644 --- a/paddle/fluid/operators/distributed_ops/send_barrier_op.cc +++ b/paddle/fluid/operators/distributed_ops/send_barrier_op.cc @@ -59,7 +59,9 @@ class SendBarrierOp : public framework::OperatorBase { } for (size_t i = 0; i < rets.size(); i++) { - PADDLE_ENFORCE_NE(rets[i]->Wait(), 0U, "internal error in RPCClient"); + PADDLE_ENFORCE_NE( + rets[i]->Wait(), 0U, + platform::errors::ExecutionTimeout("internal error in RPCClient")); } } }; diff --git a/paddle/fluid/operators/distributed_ops/send_op.cc b/paddle/fluid/operators/distributed_ops/send_op.cc index 1e9de78732085c50fd2d263aaf4e48d6df743adf..6d129a2140f45b104a797551159a0623df3fdc33 100644 --- a/paddle/fluid/operators/distributed_ops/send_op.cc +++ b/paddle/fluid/operators/distributed_ops/send_op.cc @@ -83,7 +83,9 @@ class SendOp : public framework::OperatorBase { } for (size_t i = 0; i < rets.size(); i++) { VLOG(7) << "before sync_send " << ins[i] << "from " << epmap[i]; - PADDLE_ENFORCE_NE(rets[i]->Wait(), 0U, "internal error in RPCClient"); + PADDLE_ENFORCE_NE( + rets[i]->Wait(), 0U, + platform::errors::ExecutionTimeout("internal error in RPCClient")); VLOG(7) << "after sync_send " << ins[i] << "from " << epmap[i]; } }