From 6f4bf5052f12f5efd4eb58d6c704c96801691a10 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Mon, 6 Nov 2017 13:16:01 +0800 Subject: [PATCH] fix softmax with cross entropy op. --- paddle/operators/cross_entropy_op.cc | 24 ++++++------- .../softmax_with_cross_entropy_op.cc | 30 ++++++++-------- .../softmax_with_cross_entropy_op.cu | 24 +++++++------ .../operators/softmax_with_cross_entropy_op.h | 36 +++++++++---------- .../test_softmax_with_cross_entropy_op.py | 27 +++++++------- 5 files changed, 69 insertions(+), 72 deletions(-) diff --git a/paddle/operators/cross_entropy_op.cc b/paddle/operators/cross_entropy_op.cc index 24df1fcadac..9d41879b27a 100644 --- a/paddle/operators/cross_entropy_op.cc +++ b/paddle/operators/cross_entropy_op.cc @@ -114,21 +114,17 @@ class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker { "where N is the batch size and D is the number of classes. " "This input is a probability computed by the previous operator, " "which is almost always the result of a softmax operator."); - AddInput( - "Label", - "(Tensor, default Tensor), the ground truth which is " - "a 2-D tensor. " - "When soft_label is set to false, Label is a Tensor with shape " - "[N x 1]. " - "When soft_label is set to true, Label is a Tensor " - "with shape [N x K]."); + AddInput("Label", + "(Tensor), the ground truth which is a 2-D tensor. When " + "soft_label is set to false, Label is a Tensor with shape " + "[N x 1]. When soft_label is set to true, Label is a " + "Tensor with shape [N x K]."); AddOutput("Y", - "(Tensor, default Tensor), a 2-D tensor " - "with shape [N x 1]. The cross entropy loss."); - AddAttr( - "soft_label", - "(bool, default false), a flag to indicate whether to interpretate " - "the given labels as soft labels.") + "(Tensor, default Tensor), a 2-D tensor with shape " + "[N x 1]. The cross entropy loss."); + AddAttr("soft_label", + "(bool, default false), a flag indicating whether to " + "interpretate the given labels as soft labels.") .SetDefault(false); AddComment(R"DOC( CrossEntropy Operator. diff --git a/paddle/operators/softmax_with_cross_entropy_op.cc b/paddle/operators/softmax_with_cross_entropy_op.cc index a006e0a5950..c6b94f5cc94 100644 --- a/paddle/operators/softmax_with_cross_entropy_op.cc +++ b/paddle/operators/softmax_with_cross_entropy_op.cc @@ -4,13 +4,13 @@ you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 +http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/softmax_with_cross_entropy_op.h" #include @@ -30,12 +30,10 @@ class SoftmaxWithCrossEntropyOpMaker "which is a 2-D tensor with shape [N x K]. N is the batch_size, " "and K is the class number."); AddInput("Label", - "(Tensor, default: Tensor), The ground truth which is a 2-D " - "tensor. " - "If softLabel is set to false, Label is a Tensor with shape " - "[N x 1]." - "If softLabel is set to true, Label is a Tensor " - "with shape [N x K]."); + "(Tensor) The ground truth which is a 2-D tensor. If soft_label " + "is set to false, Label is a Tensor with shape [N x 1]. If " + "soft_label is set to true, Label is a Tensor with " + "shape [N x K]."); AddOutput( "Softmax", "(Tensor, default: Tensor), A 2-D tensor with shape [N x K]. " @@ -62,7 +60,7 @@ Because this operator performs a softmax on logits internally, it expects unscaled logits. This operator should not be used with the output of softmax operator since that would produce incorrect results. -When the attribute softLabel is set false, this operators expects mutually +When the attribute soft_label is set false, this operators expects mutually exclusive hard labels, each sample in a batch is in exactly one class with a probability of 1.0. Each sample in the batch will have a single label. @@ -198,6 +196,8 @@ REGISTER_OPERATOR(softmax_with_cross_entropy, ops::SoftmaxWithCrossEntropyOp, REGISTER_OPERATOR(softmax_with_cross_entropy_grad, ops::SoftmaxWithCrossEntropyOpGrad); REGISTER_OP_CPU_KERNEL(softmax_with_cross_entropy, - ops::SoftmaxWithCrossEntropyKernel); + ops::SoftmaxWithCrossEntropyKernel, + ops::SoftmaxWithCrossEntropyKernel); REGISTER_OP_CPU_KERNEL(softmax_with_cross_entropy_grad, - ops::SoftmaxWithCrossEntropyGradKernel); + ops::SoftmaxWithCrossEntropyGradKernel, + ops::SoftmaxWithCrossEntropyGradKernel); diff --git a/paddle/operators/softmax_with_cross_entropy_op.cu b/paddle/operators/softmax_with_cross_entropy_op.cu index 7602918bb39..b1faddac3fd 100644 --- a/paddle/operators/softmax_with_cross_entropy_op.cu +++ b/paddle/operators/softmax_with_cross_entropy_op.cu @@ -4,13 +4,13 @@ you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 +http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #define EIGEN_USE_GPU @@ -24,7 +24,7 @@ using Tensor = framework::Tensor; namespace { template __global__ void CrossEntropyGrad(T* logit_grad, const T* loss_grad, - const int* labels, const int batch_size, + const int64_t* labels, const int batch_size, const int class_num) { int tid = blockIdx.x * blockDim.x + threadIdx.x; int sample_idx = tid / class_num; @@ -50,7 +50,7 @@ __global__ void SoftCrossEntropyGradientKernel(T* logit_grad, int ids = blockIdx.x * blockDim.x + threadIdx.x; if (ids < batch_size * class_num) { int row_ids = ids / class_num; - logit_grad[ids] = logit_grad[ids] * (loss_grad[row_ids] - labels[ids]); + logit_grad[ids] = loss_grad[row_ids] * (logit_grad[ids] - labels[ids]); } } } // namespace @@ -104,7 +104,7 @@ class SoftmaxWithCrossEntropyGradCUDAKernel : public framework::OpKernel { .stream()>>>(logit_grad_data, loss_grad_data, label_data, batch_size, class_num); } else { - const int* label_data = labels->data(); + const int64_t* label_data = labels->data(); CrossEntropyGrad<<< grid, block, 0, reinterpret_cast( context.device_context()) @@ -119,6 +119,8 @@ class SoftmaxWithCrossEntropyGradCUDAKernel : public framework::OpKernel { namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL(softmax_with_cross_entropy, - ops::SoftmaxWithCrossEntropyCUDAKernel); + ops::SoftmaxWithCrossEntropyCUDAKernel, + ops::SoftmaxWithCrossEntropyCUDAKernel); REGISTER_OP_GPU_KERNEL(softmax_with_cross_entropy_grad, - ops::SoftmaxWithCrossEntropyGradCUDAKernel); + ops::SoftmaxWithCrossEntropyGradCUDAKernel, + ops::SoftmaxWithCrossEntropyGradCUDAKernel); diff --git a/paddle/operators/softmax_with_cross_entropy_op.h b/paddle/operators/softmax_with_cross_entropy_op.h index 7f3f9e23aa9..c4ab3f74b4b 100644 --- a/paddle/operators/softmax_with_cross_entropy_op.h +++ b/paddle/operators/softmax_with_cross_entropy_op.h @@ -4,13 +4,13 @@ you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 +http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include "paddle/framework/eigen.h" @@ -60,25 +60,25 @@ class SoftmaxWithCrossEntropyGradKernel : public framework::OpKernel { logit_grad->ShareDataWith(*context.Input("Softmax")); const int class_num = logit_grad->dims()[1]; + auto out_grad_mat = EigenMatrix::From(*out_grad); + auto logit_grad_mat = EigenMatrix::From(*logit_grad); + if (context.Attr("soft_label")) { - auto out_grad_mat = EigenMatrix::From(*out_grad); - auto logit_grad_mat = EigenMatrix::From(*logit_grad); auto lbl_mat = EigenMatrix::From(*labels); - logit_grad_mat.device(context.GetEigenDevice()) = - logit_grad_mat * - (out_grad_mat.broadcast(Eigen::DSizes(1, class_num)) - - lbl_mat); + out_grad_mat.broadcast(Eigen::DSizes(1, class_num)) * + (logit_grad_mat - lbl_mat); } else { + logit_grad_mat.device(context.GetEigenDevice()) = + logit_grad_mat * + out_grad_mat.broadcast(Eigen::DSizes(1, class_num)); + const int batch_size = logit_grad->dims()[0]; - const int* label_data = labels->data(); - const T* out_grad_data = out_grad->data(); + const int64_t* label_data = labels->data(); T* logit_grad_data = logit_grad->data(); - + const T* out_grad_data = out_grad->data(); for (int i = 0; i < batch_size; ++i) { - int index = i * class_num + label_data[i]; - logit_grad_data[index] = - out_grad_data[i] * (logit_grad_data[index] - 1.); + logit_grad_data[i * class_num + label_data[i]] -= out_grad_data[i]; } } } diff --git a/python/paddle/v2/framework/tests/test_softmax_with_cross_entropy_op.py b/python/paddle/v2/framework/tests/test_softmax_with_cross_entropy_op.py index f93feb20696..c2f07f9096c 100644 --- a/python/paddle/v2/framework/tests/test_softmax_with_cross_entropy_op.py +++ b/python/paddle/v2/framework/tests/test_softmax_with_cross_entropy_op.py @@ -12,30 +12,30 @@ class TestSoftmaxWithCrossEntropyOp(OpTest): def setUp(self): self.op_type = "softmax_with_cross_entropy" - batch_size = 3 + batch_size = 2 class_num = 37 logits = np.random.uniform(0.1, 1.0, - [batch_size, class_num]).astype("float32") + [batch_size, class_num]).astype("float64") softmax = np.apply_along_axis(stable_softmax, 1, logits) - labels = np.random.randint(0, class_num, [batch_size, 1], dtype="int32") + labels = np.random.randint(0, class_num, [batch_size, 1], dtype="int64") cross_entropy = np.asmatrix( [[-np.log(softmax[i][labels[i][0]])] for i in range(softmax.shape[0])], - dtype="float32") + dtype="float64") self.inputs = {"Logits": logits, "Label": labels} self.outputs = { - "Softmax": softmax.astype('float32'), - "Loss": cross_entropy.astype('float32') + "Softmax": softmax.astype("float64"), + "Loss": cross_entropy.astype("float64") } def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(["Logits"], "Loss", max_relative_error=0.05) + self.check_grad(["Logits"], "Loss") class TestSoftmaxWithCrossEntropyOp2(OpTest): @@ -49,19 +49,19 @@ class TestSoftmaxWithCrossEntropyOp2(OpTest): class_num = 37 logits = np.random.uniform(0.1, 1.0, - [batch_size, class_num]).astype("float32") + [batch_size, class_num]).astype("float64") softmax = np.apply_along_axis(stable_softmax, 1, logits) labels = np.random.uniform(0.1, 1.0, - [batch_size, class_num]).astype("float32") + [batch_size, class_num]).astype("float64") labels /= np.sum(labels, axis=1, keepdims=True) cross_entropy = (-labels * np.log(softmax)).sum( - axis=1, keepdims=True).astype("float32") + axis=1, keepdims=True).astype("float64") self.inputs = {"Logits": logits, "Label": labels} self.outputs = { - "Softmax": softmax.astype('float32'), - "Loss": cross_entropy.astype('float32') + "Softmax": softmax.astype("float64"), + "Loss": cross_entropy.astype("float64") } self.attrs = {"soft_label": True} @@ -69,9 +69,8 @@ class TestSoftmaxWithCrossEntropyOp2(OpTest): self.check_output() def test_check_grad(self): - self.check_grad(["Logits"], "Loss", max_relative_error=0.05) + self.check_grad(["Logits"], "Loss") if __name__ == "__main__": - exit(0) # FIXME: xe has bug unittest.main() -- GitLab