From a97d5a6153ac972435e6f2e6915c180400742fe7 Mon Sep 17 00:00:00 2001 From: tangwei12 Date: Wed, 13 May 2020 11:35:07 +0800 Subject: [PATCH] fix op error, test=develop (#24451) --- .../operators/distributed_ops/recv_op.cc | 4 ++- .../distributed_ops/ref_by_trainer_id_op.cc | 25 +++++++++++++------ .../distributed_ops/ref_by_trainer_id_op.h | 5 +++- .../distributed_ops/send_barrier_op.cc | 4 ++- .../operators/distributed_ops/send_op.cc | 4 ++- 5 files changed, 30 insertions(+), 12 deletions(-) diff --git a/paddle/fluid/operators/distributed_ops/recv_op.cc b/paddle/fluid/operators/distributed_ops/recv_op.cc index 30353ef35d..aad9aefed4 100644 --- a/paddle/fluid/operators/distributed_ops/recv_op.cc +++ b/paddle/fluid/operators/distributed_ops/recv_op.cc @@ -84,7 +84,9 @@ class RecvOp : public framework::OperatorBase { } for (size_t i = 0; i < rets.size(); i++) { VLOG(7) << "before sync_recv " << outs[i] << "from " << epmap[i]; - PADDLE_ENFORCE_NE(rets[i]->Wait(), 0U, "internal error in RPCClient"); + PADDLE_ENFORCE_NE( + rets[i]->Wait(), 0U, + platform::errors::ExecutionTimeout("internal error in RPCClient")); VLOG(7) << "after sync_recv " << outs[i] << "from " << epmap[i]; } } diff --git a/paddle/fluid/operators/distributed_ops/ref_by_trainer_id_op.cc b/paddle/fluid/operators/distributed_ops/ref_by_trainer_id_op.cc index 6bf7084449..befdf4e938 100644 --- a/paddle/fluid/operators/distributed_ops/ref_by_trainer_id_op.cc +++ b/paddle/fluid/operators/distributed_ops/ref_by_trainer_id_op.cc @@ -27,14 +27,23 @@ class RefByTrainerIdOp : public framework::OperatorWithKernel { : OperatorWithKernel(type, inputs, outputs, attrs) {} void InferShape(framework::InferShapeContext *ctx) const override { - PADDLE_ENFORCE(ctx->HasInputs("X"), - "Input(X) of RefByTrainerIdOp should not be null."); - PADDLE_ENFORCE(ctx->HasInput("TrainerId"), - "Input(TrainerId) of RefByTrainerIdOp should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("Out"), - "Output(Out) of RefByTrainerIdOp should not be null."); - PADDLE_ENFORCE_EQ(ctx->GetInputDim("TrainerId").size(), 1, - "TrainerId should be a scalar."); + PADDLE_ENFORCE_EQ(ctx->HasInputs("X"), true, + platform::errors::InvalidArgument( + "Input(X) of RefByTrainerIdOp should not be null.")); + + PADDLE_ENFORCE_EQ( + ctx->HasInput("TrainerId"), true, + platform::errors::InvalidArgument( + "Input(TrainerId) of RefByTrainerIdOp should not be null.")); + + PADDLE_ENFORCE_EQ( + ctx->HasOutput("Out"), true, + platform::errors::InvalidArgument( + "Output(Out) of RefByTrainerIdOp should not be null.")); + + PADDLE_ENFORCE_EQ( + ctx->GetInputDim("TrainerId").size(), 1, + platform::errors::InvalidArgument("TrainerId should be a scalar.")); // Out's shape is determined at runtime. } diff --git a/paddle/fluid/operators/distributed_ops/ref_by_trainer_id_op.h b/paddle/fluid/operators/distributed_ops/ref_by_trainer_id_op.h index b9a5796bef..43dd9c3c98 100644 --- a/paddle/fluid/operators/distributed_ops/ref_by_trainer_id_op.h +++ b/paddle/fluid/operators/distributed_ops/ref_by_trainer_id_op.h @@ -38,7 +38,10 @@ class RefByTrainerIdKernel : public framework::OpKernel { } else { trainer_id = *trainer_id_data; } - PADDLE_ENFORCE_LT((size_t)trainer_id, in_list.size()); + PADDLE_ENFORCE_LT((size_t)trainer_id, in_list.size(), + platform::errors::InvalidArgument( + "X' size must >= TrainerId: [%s], but received [%s]", + trainer_id, in_list.size())); out->mutable_data(context.GetPlace()); framework::TensorCopy(*(in_list[trainer_id]), in_list[trainer_id]->place(), out); diff --git a/paddle/fluid/operators/distributed_ops/send_barrier_op.cc b/paddle/fluid/operators/distributed_ops/send_barrier_op.cc index 69c0726b20..a8e9379d21 100644 --- a/paddle/fluid/operators/distributed_ops/send_barrier_op.cc +++ b/paddle/fluid/operators/distributed_ops/send_barrier_op.cc @@ -59,7 +59,9 @@ class SendBarrierOp : public framework::OperatorBase { } for (size_t i = 0; i < rets.size(); i++) { - PADDLE_ENFORCE_NE(rets[i]->Wait(), 0U, "internal error in RPCClient"); + PADDLE_ENFORCE_NE( + rets[i]->Wait(), 0U, + platform::errors::ExecutionTimeout("internal error in RPCClient")); } } }; diff --git a/paddle/fluid/operators/distributed_ops/send_op.cc b/paddle/fluid/operators/distributed_ops/send_op.cc index 1e9de78732..6d129a2140 100644 --- a/paddle/fluid/operators/distributed_ops/send_op.cc +++ b/paddle/fluid/operators/distributed_ops/send_op.cc @@ -83,7 +83,9 @@ class SendOp : public framework::OperatorBase { } for (size_t i = 0; i < rets.size(); i++) { VLOG(7) << "before sync_send " << ins[i] << "from " << epmap[i]; - PADDLE_ENFORCE_NE(rets[i]->Wait(), 0U, "internal error in RPCClient"); + PADDLE_ENFORCE_NE( + rets[i]->Wait(), 0U, + platform::errors::ExecutionTimeout("internal error in RPCClient")); VLOG(7) << "after sync_send " << ins[i] << "from " << epmap[i]; } } -- GitLab