diff --git a/paddle/operators/cross_entropy_op.cc b/paddle/operators/cross_entropy_op.cc index 80f7b69c142eb02c64076c8724a16d09e31c72d0..2e16201e74c153888594ebe6679fb0036734dad4 100644 --- a/paddle/operators/cross_entropy_op.cc +++ b/paddle/operators/cross_entropy_op.cc @@ -37,13 +37,13 @@ class CrossEntropyOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(x->dims()[0], label->dims()[0], "The 1st dimension of Input(X) and Input(Label) should " "be equal."); - if (ctx.Attr("soft_label")) { + if (ctx.Attr("softLabel")) { PADDLE_ENFORCE_EQ(x->dims()[1], label->dims()[1], - "If Attr(soft_label) == true, the 2nd dimension of " + "If Attr(softLabel) == true, the 2nd dimension of " "Input(X) and Input(Label) should be equal."); } else { PADDLE_ENFORCE_EQ(label->dims()[1], 1, - "If Attr(soft_label) == false, the 2nd dimension of " + "If Attr(softLabel) == false, the 2nd dimension of " "Input(Label) should be 1."); } @@ -63,6 +63,8 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel { "Input(Label) should be not null."); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Y")), "Input(Y@GRAD) shoudl be not null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar(framework::GradVarName("X")), + "Output(X@GRAD) should be not null."); auto x = ctx.Input("X"); auto label = ctx.Input("Label"); @@ -80,13 +82,13 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel { "be equal."); PADDLE_ENFORCE_EQ(dy->dims()[1], 1, "The 2nd dimension of Input(Y@Grad) should be 1."); - if (ctx.Attr("soft_label")) { + if (ctx.Attr("softLabel")) { PADDLE_ENFORCE_EQ(x->dims()[1], label->dims()[1], - "When Attr(soft_label) == true, the 2nd dimension of " + "When Attr(softLabel) == true, the 2nd dimension of " "Input(X) and Input(Label) should be equal."); } else { PADDLE_ENFORCE_EQ(label->dims()[1], 1, - "When Attr(soft_label) == false, the 2nd dimension of " + "When Attr(softLabel) == false, the 2nd dimension of " "Input(Label) should be 1."); } @@ -105,18 +107,19 @@ class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker { "where N is the batch size and D is the number of classes. " "This input is a probability computed by the previous operator, " "which is almost always the result of a softmax operator."); - AddInput("Label", - "(Tensor, default Tensor), the ground truth which is " - "a 1-D or 2-D tensor. " - "When soft_label is set to 0, `Label` is a Tensor with shape " - "[N x 1]. " - "When soft_label is set to 1, `Label` is a Tensor " - "with shape [N x K]."); + AddInput( + "Label", + "(Tensor, default Tensor), the ground truth which is " + "a 2-D tensor. " + "When softLabel is set to false, `Label` is a Tensor with shape " + "[N x 1]. " + "When softLabel is set to true, `Label` is a Tensor " + "with shape [N x K]."); AddOutput("Y", - "(Tensor, default Tensor), a 1-D tensor " + "(Tensor, default Tensor), a 2-D tensor " "with shape [N x 1]. The cross entropy loss."); AddAttr( - "soft_label", + "softLabel", "(bool, default false), a flag to indicate whether to interpretate " "the given labels as soft labels.") .SetDefault(false); @@ -126,12 +129,12 @@ CrossEntropy Operator. It supports both standard cross-entropy and soft-label cross-entropy loss computation. 1) One-hot cross-entropy: - soft_label = False, Label[i, 0] indicates the class index for sample i: + softLabel = false, Label[i, 0] indicates the class index for sample i: Y[i] = -log(X[i, Label[i]]) 2) Soft-label cross-entropy: - soft_label = True, Label[i, j] indicates the soft label of class j + softLabel = true, Label[i, j] indicates the soft label of class j for sample i: Y[i] = \sum_j{-Label[i, j] * log(X[i, j])} diff --git a/paddle/operators/cross_entropy_op.cu b/paddle/operators/cross_entropy_op.cu index 283021eae53bc49dc121f6ee2a1125a4cff6266e..18e44d77c9f62b296dc57952e546f844670c7d57 100644 --- a/paddle/operators/cross_entropy_op.cu +++ b/paddle/operators/cross_entropy_op.cu @@ -70,7 +70,7 @@ __global__ void SoftCrossEntropyKernel(T* Y, const T* X, const T* label, // TODO(qingqing): make zero setting a common function. template -__global__ void zero(T* X, const int N) { +__global__ void Zero(T* X, const int N) { for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N; i += blockDim.x * gridDim.x) { X[i] = 0.0; @@ -108,18 +108,17 @@ class CrossEntropyOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), "This kernel only runs on GPU device."); - auto x = ctx.Input("X"); - auto y = ctx.Output("Y"); - auto label = ctx.Input("Label"); + const Tensor* x = ctx.Input("X"); + const Tensor* label = ctx.Input("Label"); + Tensor* y = ctx.Output("Y"); - auto* x_data = x->data(); - y->mutable_data(ctx.GetPlace()); - auto* y_data = y->data(); + const T* x_data = x->data(); + T* y_data = y->mutable_data(ctx.GetPlace()); int batch_size = x->dims()[0]; int class_num = x->dims()[1]; - if (ctx.Attr("soft_label")) { + if (ctx.Attr("softLabel")) { auto* label_data = ctx.Input("Label")->data(); int block = class_num > 512 ? 512 : pow(2, int(std::log2(class_num))); @@ -148,38 +147,41 @@ class CrossEntropyGradientOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), "This kernel only runs on GPU device."); - auto x = ctx.Input("X"); - auto dx = ctx.Output(framework::GradVarName("X")); - auto dy = ctx.Input(framework::GradVarName("Y")); - auto label = ctx.Input("Label"); + const Tensor* x = ctx.Input("X"); + const Tensor* label = ctx.Input("Label"); + Tensor* dx = ctx.Output(framework::GradVarName("X")); - auto* dx_data = dx->mutable_data(ctx.GetPlace()); - auto* dy_data = dy->data(); - auto* x_data = x->data(); + const T* dy_data = + ctx.Input(framework::GradVarName("Y"))->data(); + T* dx_data = dx->mutable_data(ctx.GetPlace()); + const T* x_data = x->data(); - int n = x->dims()[0]; - int d = x->dims()[1]; + int batch_size = x->dims()[0]; + int class_num = x->dims()[1]; int block = 512; - int grid = (n * d + block - 1) / block; - zero<<( - ctx.device_context()) - .stream()>>>(dx_data, n * d); - if (ctx.Attr("soft_label")) { + int grid = (batch_size * class_num + block - 1) / block; + + if (ctx.Attr("softLabel")) { auto* label_data = label->data(); SoftCrossEntropyGradientKernel<<< grid, block, 0, reinterpret_cast( ctx.device_context()) .stream()>>>(dx_data, dy_data, x_data, label_data, - n, d); + batch_size, class_num); } else { + Zero<<( + ctx.device_context()) + .stream()>>>(dx_data, batch_size * class_num); + auto* label_data = label->data(); + grid = (batch_size + block - 1) / block; CrossEntropyGradientKernel<<< grid, block, 0, reinterpret_cast( ctx.device_context()) .stream()>>>(dx_data, dy_data, x_data, label_data, - n, d); + batch_size, class_num); } } }; diff --git a/paddle/operators/cross_entropy_op.h b/paddle/operators/cross_entropy_op.h index 6b3f8c95bee58a3bf7d5f40ba84656beee6cdc97..255b2e9f5ea7566cca7fd3914e38da804b7c7006 100644 --- a/paddle/operators/cross_entropy_op.h +++ b/paddle/operators/cross_entropy_op.h @@ -42,14 +42,14 @@ class CrossEntropyOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()), - "It must use CPUPlace."); + "This kernel only runs on CPU."); const Tensor* x = ctx.Input("X"); const Tensor* labels = ctx.Input("Label"); Tensor* y = ctx.Output("Y"); - y->mutable_data(ctx.GetPlace()); + T* y_data = y->mutable_data(ctx.GetPlace()); const int batch_size = x->dims()[0]; - if (ctx.Attr("soft_label")) { + if (ctx.Attr("softLabel")) { auto prob = EigenMatrix::From(*x); auto lbl_mat = EigenMatrix::From(*labels); auto loss = EigenMatrix::From(*y); @@ -60,9 +60,7 @@ class CrossEntropyOpKernel : public framework::OpKernel { .reshape(Eigen::DSizes(batch_size, 1))); } else { const int class_num = x->dims()[1]; - const T* x_data = x->data(); - T* y_data = y->data(); const int* label_data = labels->data(); for (int i = 0; i < batch_size; ++i) { @@ -78,33 +76,32 @@ class CrossEntropyGradientOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()), - "It must use CPUPlace."); - - auto x = ctx.Input("X"); - auto dx = ctx.Output(framework::GradVarName("X")); - auto dy = ctx.Input(framework::GradVarName("Y")); - auto label = ctx.Input("Label"); - - auto* dx_data = dx->mutable_data(ctx.GetPlace()); - auto* dy_data = dy->data(); - auto* x_data = x->data(); + "This kernel only runs on CPU."); + const Tensor* x = ctx.Input("X"); + const Tensor* dy = ctx.Input(framework::GradVarName("Y")); + const Tensor* label = ctx.Input("Label"); + Tensor* dx = ctx.Output(framework::GradVarName("X")); + T* dx_data = dx->mutable_data(ctx.GetPlace()); - int batch_size = x->dims()[0]; int class_num = x->dims()[1]; - - // TODO(qingqing): make zero setting an common function. - if (ctx.Attr("soft_label")) { - auto* label_data = ctx.Input("Label")->data(); - int index = 0; - for (int i = 0; i < batch_size; ++i) { - for (int j = 0; j < class_num; ++j) { - dx_data[index] = -label_data[index] * dy_data[i] / x_data[index]; - index++; - } - } + if (ctx.Attr("softLabel")) { + auto x_mat = EigenMatrix::From(*x); + auto dy_mat = EigenMatrix::From(*dy); + auto lbl_mat = EigenMatrix::From(*label); + auto dx_mat = EigenMatrix::From(*dx); + + dx_mat.device(ctx.GetEigenDevice()) = + -(lbl_mat * dy_mat.broadcast(Eigen::DSizes(1, class_num)) / + x_mat); } else { - auto* label_data = label->data(); + int batch_size = x->dims()[0]; + const T* dy_data = dy->data(); + const T* x_data = x->data(); + const int* label_data = label->data(); + + // TODO(qingqing): make zero setting a common function. memset(dx_data, 0, sizeof(T) * batch_size * class_num); + for (int i = 0; i < batch_size; ++i) { PADDLE_ASSERT(label_data[i] >= 0 || label_data[i] < class_num); int index = i * class_num + label_data[i]; diff --git a/python/paddle/v2/framework/tests/test_cross_entropy_op.py b/python/paddle/v2/framework/tests/test_cross_entropy_op.py index 52032f3a698005393b6766a941697267daa2bd55..1de514dff487158e0823fd628d9b3b50f36fdd9b 100644 --- a/python/paddle/v2/framework/tests/test_cross_entropy_op.py +++ b/python/paddle/v2/framework/tests/test_cross_entropy_op.py @@ -21,7 +21,7 @@ class TestCrossEntropyOp1(OpTest): self.inputs = {"X": X, "Label": label} self.outputs = {"Y": cross_entropy} - self.attrs = {"soft_label": False} + self.attrs = {"softLabel": False} def test_check_output(self): self.check_output() @@ -49,7 +49,7 @@ class TestCrossEntropyOp2(OpTest): self.inputs = {"X": X, "Label": label} self.outputs = {"Y": cross_entropy} - self.attrs = {"soft_label": True} + self.attrs = {"softLabel": True} def test_check_output(self): self.check_output() @@ -73,6 +73,7 @@ class TestCrossEntropyOp3(OpTest): 0, class_num, (batch_size), dtype="int32") label = np.zeros(X.shape) label[np.arange(batch_size), label_index] = 1 + cross_entropy = np.asmatrix( [[-np.log(X[i][label_index[i]])] for i in range(X.shape[0])], dtype="float32") @@ -81,7 +82,7 @@ class TestCrossEntropyOp3(OpTest): self.inputs = {"X": X, "Label": label} self.outputs = {"Y": cross_entropy} - self.attrs = {"soft_label": True} + self.attrs = {"softLabel": True} def test_check_output(self): self.check_output()