diff --git a/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op_xpu.cc b/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op_xpu.cc index 908018d7550802590a3a02e8ddab361310a2fd2f..e8944040751fc802ca630ea6c0124e4258cc3d30 100644 --- a/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op_xpu.cc +++ b/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op_xpu.cc @@ -33,12 +33,12 @@ template class CSoftmaxWithCrossEntropyOp : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - const int ignore_index = ctx.Attr("ignore_index"); + const int64_t ignore_index = ctx.Attr("ignore_index"); PADDLE_ENFORCE_LT(ignore_index, 0, platform::errors::InvalidArgument( "When SoftmaxWithCrossEntropy run on XPU, " - "ignore_index should be <=0, however it's %d", + "ignore_index should be <=0, however it's %ld", ignore_index)); const int rid = ctx.Attr("ring_id"); auto map = distributed::ProcessGroupMapFromGid::getInstance(); @@ -460,12 +460,12 @@ class CSoftmaxWithCrossEntropyGrad : public framework::OpKernel { context.Output(framework::GradVarName("Logits")); const phi::DenseTensor* softmax = context.Input("Softmax"); - const int ignore_index = context.Attr("ignore_index"); + const int64_t ignore_index = context.Attr("ignore_index"); PADDLE_ENFORCE_LT(ignore_index, 0, platform::errors::InvalidArgument( "When SoftmaxWithCrossEntropy run on XPU, " - "ignore_index should be <=0, however it's %d", + "ignore_index should be <=0, however it's %ld", ignore_index)); const int rank = context.Attr("rank"); auto& dev_ctx = context.template device_context(); diff --git a/python/paddle/distributed/fleet/layers/mpu/mp_layers.py b/python/paddle/distributed/fleet/layers/mpu/mp_layers.py index f820acfa8f112054d7d6501f128d7ce5adda245b..55bec32bb1a5cfdfdafe27f5472194520488a089 100644 --- a/python/paddle/distributed/fleet/layers/mpu/mp_layers.py +++ b/python/paddle/distributed/fleet/layers/mpu/mp_layers.py @@ -529,7 +529,7 @@ class ParallelCrossEntropy(paddle.nn.Layer): mp_group(Group): The tensor parallel group. name(str, optional): Normally there is no need for user to set this parameter. For detailed information, please refer to :ref:`api_guide_Name` . - ignore_index (int, optional): Specifies a target value that is ignored and + ignore_index (long int, optional): Specifies a target value that is ignored and does not contribute to the loss. A negative value means that no label value needs to be ignored. Default is -100 .