diff --git a/paddle/fluid/operators/softmax_with_cross_entropy_op.cc b/paddle/fluid/operators/softmax_with_cross_entropy_op.cc index fda971b20e27b68cab6110c323469f0d1c77cb59..7c024e50dd21abea4e29257ebc53f7baa7f25235 100644 --- a/paddle/fluid/operators/softmax_with_cross_entropy_op.cc +++ b/paddle/fluid/operators/softmax_with_cross_entropy_op.cc @@ -106,24 +106,36 @@ class SoftmaxWithCrossEntropyOp : public framework::OperatorWithKernel { auto logits_dims = ctx->GetInputDim("Logits"); auto labels_dims = ctx->GetInputDim("Label"); + + int rank = logits_dims.size(); PADDLE_ENFORCE_EQ( - logits_dims.size(), 2UL, - "The input of softmax_with_cross_entropy should be a 2-D tensor."); - PADDLE_ENFORCE_EQ(labels_dims.size(), 2UL, - "The labels should be a 2-D tensor."); + rank, labels_dims.size(), + "Input(logits) and Input(Label) shall have the same rank."); + bool check = ctx->IsRuntime() || (framework::product(logits_dims) > 0 && + framework::product(labels_dims) > 0); + if (check) { + PADDLE_ENFORCE_EQ(framework::slice_ddim(logits_dims, 0, rank - 1), + framework::slice_ddim(labels_dims, 0, rank - 1), + "Input(X) and Input(Label) shall have the same shape " + "except the last dimension."); + } if (ctx->Attrs().Get("soft_label")) { - PADDLE_ENFORCE_EQ(logits_dims[1], labels_dims[1], - "If Attr(soft_label) == true, the 2nd dimension of " - "Input(X) and Input(Label) should be equal."); + if (check) { + PADDLE_ENFORCE_EQ(logits_dims[rank - 1], labels_dims[rank - 1], + "If Attr(soft_label) == true, the last dimension of " + "Input(X) and Input(Label) should be equal."); + } } else { - PADDLE_ENFORCE_EQ(labels_dims[1], 1UL, - "If Attr(soft_label) == false, the 2nd dimension of " + PADDLE_ENFORCE_EQ(labels_dims[rank - 1], 1UL, + "If Attr(softLabel) == false, the last dimension of " "Input(Label) should be 1."); } ctx->SetOutputDim("Softmax", logits_dims); - ctx->SetOutputDim("Loss", {logits_dims[0], 1}); + auto loss_dims = logits_dims; + loss_dims[rank - 1] = 1; + ctx->SetOutputDim("Loss", loss_dims); ctx->ShareLoD("Logits", /*->*/ "Softmax"); ctx->ShareLoD("Logits", /*->*/ "Loss"); @@ -152,16 +164,33 @@ class SoftmaxWithCrossEntropyOpGrad : public framework::OperatorWithKernel { auto softmax_dims = ctx->GetInputDim("Softmax"); auto labels_dims = ctx->GetInputDim("Label"); - PADDLE_ENFORCE_EQ(labels_dims.size(), 2UL, - "The labels should be a 2-D tensor."); + + int rank = softmax_dims.size(); + PADDLE_ENFORCE_EQ( + rank, labels_dims.size(), + "Input(logits) and Input(Label) shall have the same rank."); + bool check = true; + if ((!ctx->IsRuntime()) && (framework::product(softmax_dims) <= 0 || + framework::product(labels_dims) <= 0)) { + check = false; + } + if (check) { + PADDLE_ENFORCE_EQ( + framework::slice_ddim(softmax_dims, 0, rank - 1), + framework::slice_ddim(labels_dims, 0, rank - 1), + "Input(Softmax) and Input(Label) shall have the same shape " + "except the last dimension."); + } if (ctx->Attrs().Get("soft_label")) { - PADDLE_ENFORCE_EQ(softmax_dims[1], labels_dims[1], - "When Attr(soft_label) == true, the 2nd dimension of " - "Input(X) and Input(Label) should be equal."); + if (check) { + PADDLE_ENFORCE_EQ(softmax_dims[rank - 1], labels_dims[rank - 1], + "If Attr(soft_label) == true, the last dimension of " + "Input( Softmax) and Input(Label) should be equal."); + } } else { - PADDLE_ENFORCE_EQ(labels_dims[1], 1UL, - "When Attr(soft_label) == false, the 2nd dimension of " + PADDLE_ENFORCE_EQ(labels_dims[rank - 1], 1UL, + "If Attr(softLabel) == false, the last dimension of " "Input(Label) should be 1."); } diff --git a/paddle/fluid/operators/softmax_with_cross_entropy_op.cu b/paddle/fluid/operators/softmax_with_cross_entropy_op.cu index 89aaac4cbe6399af08b3d340896df7a07e1be543..d3b8538124f60cd1c6f5849ee5078b83e4485c9f 100644 --- a/paddle/fluid/operators/softmax_with_cross_entropy_op.cu +++ b/paddle/fluid/operators/softmax_with_cross_entropy_op.cu @@ -400,9 +400,15 @@ class SoftmaxWithCrossEntropyCUDAKernel : public framework::OpKernel { auto soft_label = context.Attr("soft_label"); auto ignore_index = context.Attr("ignore_index"); + + int rank = logits->dims().size(); if (soft_label) { - int batch_size = logits->dims()[0]; - int feature_size = logits->dims()[1]; + int batch_size = 1; + for (int i = 0; i < rank - 1; ++i) { + batch_size *= logits->dims()[i]; + } + + int feature_size = logits->dims()[rank - 1]; auto* logits_data = logits->data(); auto* labels_data = labels->data(); SoftmaxWithCrossEntropyFusedKernel( @@ -410,14 +416,23 @@ class SoftmaxWithCrossEntropyCUDAKernel : public framework::OpKernel { feature_size, context.cuda_device_context().stream()); } else { if (!context.Attr("numeric_stable_mode")) { - math::SoftmaxCUDNNFunctor()(context.cuda_device_context(), logits, - softmax); + // reshape to 2d + Tensor logits_2d = framework::ReshapeToMatrix(*logits, rank - 1); + Tensor softmax_2d = framework::ReshapeToMatrix(*softmax, rank - 1); + Tensor loss_2d = framework::ReshapeToMatrix(*loss, rank - 1); + Tensor labels_2d = framework::ReshapeToMatrix(*labels, rank - 1); + + math::SoftmaxCUDNNFunctor()(context.cuda_device_context(), + &logits_2d, &softmax_2d); math::CrossEntropyFunctor()( - context.cuda_device_context(), loss, softmax, labels, false, - ignore_index); + context.cuda_device_context(), &loss_2d, &softmax_2d, &labels_2d, + false, ignore_index); } else { - int batch_size = logits->dims()[0]; - int feature_size = logits->dims()[1]; + int batch_size = 1; + for (int i = 0; i < rank - 1; ++i) { + batch_size *= logits->dims()[i]; + } + int feature_size = logits->dims()[rank - 1]; auto* logits_data = logits->data(); auto* labels_data = labels->data(); HardLabelSoftmaxWithCrossEntropy( @@ -443,8 +458,13 @@ class SoftmaxWithCrossEntropyGradCUDAKernel : public framework::OpKernel { context.device_context(), logit_grad); T* logit_grad_data = logit_grad->data(); - const int batch_size = logit_grad->dims()[0]; - const int class_num = logit_grad->dims()[1]; + int rank = logit_grad->dims().size(); + int batch_size = 1; + for (int i = 0; i < rank - 1; ++i) { + batch_size *= logit_grad->dims()[i]; + } + + const int class_num = logit_grad->dims()[rank - 1]; int block = 512; auto stream = context.cuda_device_context().stream(); auto ignore_index = context.Attr("ignore_index"); diff --git a/paddle/fluid/operators/softmax_with_cross_entropy_op.h b/paddle/fluid/operators/softmax_with_cross_entropy_op.h index 1042cbdcf5e96f0dd3780793cf1f233dc32c3eec..8cba960c7633af56cbf74d662f77af9bb32994ca 100644 --- a/paddle/fluid/operators/softmax_with_cross_entropy_op.h +++ b/paddle/fluid/operators/softmax_with_cross_entropy_op.h @@ -40,15 +40,22 @@ class SoftmaxWithCrossEntropyKernel : public framework::OpKernel { softmax->mutable_data(context.GetPlace()); loss->mutable_data(context.GetPlace()); - int axis_dim = logits->dims()[logits->dims().size() - 1]; + // reshape to 2D tensor + int rank = logits->dims().size(); + Tensor logits_2d = framework::ReshapeToMatrix(*logits, rank - 1); + Tensor labels_2d = framework::ReshapeToMatrix(*labels, rank - 1); + Tensor loss_2d = framework::ReshapeToMatrix(*loss, rank - 1); + Tensor softmax_2d = framework::ReshapeToMatrix(*softmax, rank - 1); + + int axis_dim = logits->dims()[rank - 1]; auto& dev_ctx = context.template device_context(); math::SoftmaxFunctor()( - dev_ctx, axis_dim, logits, softmax); + dev_ctx, axis_dim, &logits_2d, &softmax_2d); math::CrossEntropyFunctor()( - dev_ctx, loss, softmax, labels, context.Attr("soft_label"), - context.Attr("ignore_index")); + dev_ctx, &loss_2d, &softmax_2d, &labels_2d, + context.Attr("soft_label"), context.Attr("ignore_index")); } }; @@ -63,13 +70,19 @@ class SoftmaxWithCrossEntropyGradKernel : public framework::OpKernel { context.Output(framework::GradVarName("Logits")); logit_grad->ShareDataWith(*context.Input("Softmax")); - const int class_num = logit_grad->dims()[1]; - auto out_grad_mat = EigenMatrix::From(*out_grad); - auto logit_grad_mat = EigenMatrix::From(*logit_grad); + int rank = logit_grad->dims().size(); + const int class_num = logit_grad->dims()[rank - 1]; + // reshape to 2d + Tensor logit_grad_2d = framework::ReshapeToMatrix(*logit_grad, rank - 1); + Tensor out_grad_2d = framework::ReshapeToMatrix(*out_grad, rank - 1); + + auto out_grad_mat = EigenMatrix::From(out_grad_2d); + auto logit_grad_mat = EigenMatrix::From(logit_grad_2d); auto& place = *context.template device_context() .eigen_device(); if (context.Attr("soft_label")) { - auto lbl_mat = EigenMatrix::From(*labels); + Tensor labels_2d = framework::ReshapeToMatrix(*labels, rank - 1); + auto lbl_mat = EigenMatrix::From(labels_2d); logit_grad_mat.device(place) = out_grad_mat.broadcast(Eigen::DSizes(1, class_num)) * (logit_grad_mat - lbl_mat); @@ -78,7 +91,8 @@ class SoftmaxWithCrossEntropyGradKernel : public framework::OpKernel { logit_grad_mat * out_grad_mat.broadcast(Eigen::DSizes(1, class_num)); - const int batch_size = logit_grad->dims()[0]; + const int batch_size = logit_grad_2d.dims()[0]; + const int64_t* label_data = labels->data(); T* logit_grad_data = logit_grad->data(); const T* out_grad_data = out_grad->data(); diff --git a/python/paddle/fluid/tests/unittests/test_softmax_with_cross_entropy_op.py b/python/paddle/fluid/tests/unittests/test_softmax_with_cross_entropy_op.py index b0494f114c5f7f4449e87ec67b97924fe77cd8c9..b06b52f75d21a720e2473feba6ba2e1dccc2db89 100644 --- a/python/paddle/fluid/tests/unittests/test_softmax_with_cross_entropy_op.py +++ b/python/paddle/fluid/tests/unittests/test_softmax_with_cross_entropy_op.py @@ -195,5 +195,144 @@ class TestSoftmaxWithCrossEntropyOp3NoCudnn(TestSoftmaxWithCrossEntropyOp3): self.numeric_stable_mode = True +class TestSoftmaxWithCrossEntropyOp5(OpTest): + """ + Test softmax with cross entropy operator with ignore_index. + """ + + def initParams(self): + self.numeric_stable_mode = False + + def setUp(self): + self.initParams() + self.op_type = "softmax_with_cross_entropy" + batch_size = [6, 10] + class_num = 47 + + logits = np.random.uniform( + 0.1, 1.0, tuple(batch_size + [class_num])).astype("float64") + softmax = np.apply_along_axis(stable_softmax, 2, logits) + labels = np.random.randint( + 0, class_num, tuple(batch_size + [1]), dtype="int64") + ignore_index = 7 + + softmax_2d = np.reshape(softmax, [-1, class_num]) + labels_2d = np.reshape(labels, [-1, 1]) + cross_entropy = np.asmatrix( + [[-np.log(softmax_2d[i][labels_2d[i][0]])] + if labels_2d[i] != ignore_index else [0] + for i in range(softmax_2d.shape[0])], + dtype="float64") + + cross_entropy = np.reshape(cross_entropy, batch_size) + + output_shape = tuple(batch_size + [1]) + output_res = cross_entropy.astype("float64") + output_res = np.expand_dims(output_res, axis=2) + self.inputs = {"Logits": logits, "Label": labels} + self.outputs = { + "Softmax": softmax.astype("float64"), + "Loss": output_res, + } + self.attrs = { + "ignore_index": ignore_index, + "numeric_stable_mode": self.numeric_stable_mode + } + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(["Logits"], "Loss") + + +class TestSoftmaxWithCrossEntropyOp5NoCudnn(TestSoftmaxWithCrossEntropyOp5): + def initParams(self): + self.numeric_stable_mode = True + + +class TestSoftmaxWithCrossEntropyOp6(OpTest): + """ + Test softmax with cross entropy operator with soft labels. + """ + + def setUp(self): + self.op_type = "softmax_with_cross_entropy" + batch_size = [6, 10] + class_num = 37 + + logits = np.random.uniform( + 0.1, 1.0, tuple(batch_size + [class_num])).astype("float64") + softmax = np.apply_along_axis(stable_softmax, 2, logits) + labels = np.random.uniform( + 0.1, 1.0, tuple(batch_size + [class_num])).astype("float64") + labels /= np.sum(labels, axis=2, keepdims=True) + + cross_entropy = (-labels * np.log(softmax)).sum( + axis=2, keepdims=True).astype("float64") + + self.inputs = {"Logits": logits, "Label": labels} + self.outputs = { + "Softmax": softmax.astype("float64"), + "Loss": cross_entropy.astype("float64") + } + self.attrs = {"soft_label": True} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(["Logits"], "Loss") + + +class TestSoftmaxWithCrossEntropyOpFp16_2(TestSoftmaxWithCrossEntropyOp): + def initParams(self): + self.numeric_stable_mode = False + self.dtype = np.float16 + + def setUp(self): + self.initParams() + self.op_type = "softmax_with_cross_entropy" + batch_size = [64, 10] + class_num = 37 + + # NOTE: numpy float16 have very low accuracy, use float32 for numpy check. + logits = np.random.uniform( + 0.1, 1.0, tuple(batch_size + [class_num])).astype(np.float32) + softmax = np.apply_along_axis(stable_softmax, 2, logits) + labels = np.random.randint( + 0, class_num, tuple(batch_size + [1]), dtype="int64") + + softmax_2d = np.reshape(softmax, [-1, class_num]) + labels_2d = np.reshape(labels, [-1, 1]) + + cross_entropy = np.asmatrix( + [[-np.log(softmax_2d[i][labels_2d[i][0]])] + for i in range(softmax_2d.shape[0])], + dtype=np.float32) + + cross_entropy = np.reshape(cross_entropy, batch_size) + output_shape = tuple(batch_size + [1]) + output_res = cross_entropy.astype(self.dtype) + output_res = np.expand_dims(output_res, axis=2) + self.inputs = {"Logits": logits, "Label": labels} + + self.inputs = { + "Logits": logits.astype(self.dtype).view(np.uint16), + "Label": labels + } + self.outputs = { + "Softmax": softmax.astype(self.dtype), + "Loss": output_res, + } + self.attrs = {"numeric_stable_mode": self.numeric_stable_mode} + + def test_check_output(self): + self.check_output(atol=1e-2) + + def test_check_grad(self): + self.check_grad(["Logits"], "Loss", max_relative_error=0.1) + + if __name__ == "__main__": unittest.main()