fix implementations.

8f8ea005 · caoying03 · 1fb5f12f · 8f8ea005 · 8f8ea005 · 8f8ea005
7 changed file
--- a/paddle/operators/math/utils.h
+++ b/paddle/operators/math/utils.h
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+   http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+#pragma once
+#include "paddle/platform/assert.h"
+#include "paddle/platform/hostdevice.h"
+namespace paddle {
+namespace operators {
+namespace math {
+template <typename T>
+T HOSTDEVICE tolerable_value(const T x) {
+  PADDLE_ASSERT(std::is_floating_point<T>::value);
+  const T kApproInf = 1e20;
+  if (x == INFINITY) {
+    return kApproInf;
+  }
+  if (x == -INFINITY) {
+    return -kApproInf;
+  }
+  return x;
+}
+}  // namespace math
+}  // namespace operators
+}  // namespace paddle
--- a/paddle/operators/onehot_cross_entropy_op.cu
+++ b/paddle/operators/onehot_cross_entropy_op.cu
@@ -13,6 +13,7 @@
   limitations under the License. */
 #include "paddle/framework/op_registry.h"
+#include "paddle/operators/math/utils.h"
 #include "paddle/platform/assert.h"
 namespace paddle {
@@ -20,20 +21,6 @@ namespace operators {
 using Tensor = framework::Tensor;
-template <typename T>
-__host__ __device__ T clipping_log(const T x) {
-  PADDLE_ASSERT(std::is_floating_point<T>::value);
-  const T kApproInf = 1e20;
-  T v = log(x);
-  if (v == INFINITY) {
-    return kApproInf;
-  }
-  if (v == -INFINITY) {
-    return -kApproInf;
-  }
-  return v;
-}
 template <typename T>
 __global__ void CrossEntropyKernel(T* Y, const T* X, const int* label,
                                   const int N, const int D) {
@@ -42,7 +29,7 @@ __global__ void CrossEntropyKernel(T* Y, const T* X, const int* label,
  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N;
       i += blockDim.x * gridDim.x) {
    PADDLE_ASSERT(label[i] >= 0 && label[i] < D);
-    Y[i] = -clipping_log(X[i * D + label[i]]);
+    Y[i] = -math::tolerable_value(log(X[i * D + label[i]]));
  }
 }
@@ -73,7 +60,7 @@ class OnehotCrossEntropyOpCUDAKernel : public framework::OpKernel {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
    PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
-                   "It must use GPUPlace.");
+                   "This kernel only runs on GPU device.");
    auto X = ctx.Input<Tensor>("X");
    const T* Xdata = X->data<T>();
@@ -86,6 +73,7 @@ class OnehotCrossEntropyOpCUDAKernel : public framework::OpKernel {
    int D = X->dims()[1];
    int block = 512;
    int grid = (N + block - 1) / block;
    // TODO(qingqing) launch kernel on specified stream
    // base on ExecutionContext.
    CrossEntropyKernel<T><<<grid, block>>>(Ydata, Xdata, label_data, N, D);

--- a/paddle/operators/softmax_with_cross_entropy_op.cc
+++ b/paddle/operators/softmax_with_cross_entropy_op.cc
@@ -32,7 +32,7 @@ class SoftmaxWithCrossEntropyOpMaker
              "Store the outputs of softmax function, "
              "which will be used in backward calculation.")
        .AsIntermediate();
-    AddOutput("Loss", "A 1-D tensor<float> with shape N.");
+    AddOutput("Out", "A 1-D tensor<float> with shape N.");
    AddComment(R"DOC(
 Cross entropy loss with softmax are used as the output layer extensively. This
 operator computes the softmax normalized values for each row of the input
@@ -56,14 +56,14 @@ class SoftmaxWithCrossEntropyOpGrad : public framework::OperatorWithKernel {
 protected:
  void InferShape(const framework::InferShapeContext& ctx) const override {
-    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Loss")),
+    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")),
-                            "Input(Loss@Grad) should not be null");
+                            "Input(Out@Grad) should not be null");
    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Softmax"),
                            "Input(Softmax) should be not null.");
    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Label"),
                            "Input(Lable) should be not null.");
-    ctx.Output<Tensor>(framework::GradVarName("Logits"))
+    ctx.Output<framework::LoDTensor>(framework::GradVarName("Logits"))
        ->Resize(ctx.Input<Tensor>("Softmax")->dims());
  }
 };
@@ -81,8 +81,8 @@ class SoftmaxWithCrossEntropyOp : public framework::OperatorWithKernel {
    PADDLE_ENFORCE(ctx.Input<Tensor>("Label")->dims().size() == 1UL,
                   "The label should be a 1-d tensor.");
-    ctx.Output<Tensor>("Softmax")->Resize(logits->dims());
+    ctx.Output<framework::LoDTensor>("Softmax")->Resize(logits->dims());
-    ctx.Output<Tensor>("Loss")->Resize({logits->dims()[0], 1});
+    ctx.Output<framework::LoDTensor>("Out")->Resize({logits->dims()[0], 1});
  }
 };

--- a/paddle/operators/softmax_with_cross_entropy_op.cu
+++ b/paddle/operators/softmax_with_cross_entropy_op.cu
-/* Copyright (c) 2016 PaddlePaddle Authors All Rights Reserve.
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
@@ -13,8 +13,97 @@
   limitations under the License. */
 #define EIGEN_USE_GPU
-#include "softmax_with_cross_entropy_op.h"
+#include "paddle/framework/op_registry.h"
+#include "paddle/operators/math/softmax_function.h"
+#include "paddle/operators/math/utils.h"
-namespace ops = paddle::operators;
+namespace paddle {
+namespace operators {
+using Tensor = framework::Tensor;
+template <typename T>
+__global__ void CrossEntropyKernel(T* out, const T* softmax_out,
+                                   const int* label, const int batch_size,
+                                   const int class_num) {
+  int i = blockIdx.x * blockDim.x + threadIdx.x;
+  if (i >= batch_size) return;
+  PADDLE_ASSERT(label[i] >= 0 && label[i] < class_num);
+  out[i] = -math::tolerable_value(log(softmax_out[i * class_num + label[i]]));
+}
+template <typename T>
+__global__ void CrossEntropyWithSoftmaxGradKernel(T* softmax_out,
+                                                  const int* label,
+                                                  const int batch_size,
+                                                  const int class_num) {
+  int i = blockIdx.x * blockDim.x + threadIdx.x;
+  if (i >= batch_size) return;
+  PADDLE_ASSERT(label[i] >= 0 && label[i] < class_num);
+  softmax_out[i * class_num + label[i]] -= 1.;
+}
+template <typename T>
+class SoftmaxWithCrossEntropyCUDAKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    PADDLE_ENFORCE(platform::is_gpu_place(context.GetPlace()),
+                   "This kernel only runs on GPU device.");
+    // Calculate ths softmax outputs.
+    const Tensor* logits = context.Input<Tensor>("Logits");
+    Tensor* softmax = context.Output<Tensor>("Softmax");
+    softmax->mutable_data<T>(context.GetPlace());
+    math::SoftmaxFunctor<platform::GPUPlace, T>()(logits, softmax, context);
+    T* softmax_out = softmax->data<T>();
+    // Calculate the cross entropy loss based on hard labels.
+    const int* label_data = context.Input<Tensor>("Label")->data<int>();
+    Tensor* loss = context.Output<Tensor>("Out");
+    loss->mutable_data<T>(context.GetPlace());
+    T* loss_data = loss->data<T>();
+    const int batch_size = logits->dims()[0];
+    const int class_num = logits->dims()[1];
+    int block = 512;
+    int grid = (batch_size + block - 1) / block;
-// TODO(caoying) add GPU kernel
+    CrossEntropyKernel<T><<<grid, block>>>(loss_data, softmax_out, label_data,
+                                           batch_size, class_num);
+  }
+};
+template <typename T>
+class SoftmaxWithCrossEntropyGradCUDAKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    PADDLE_ENFORCE(platform::is_gpu_place(context.GetPlace()),
+                   "This kernel only runs on GPU device.");
+    Tensor* logit_grad =
+        context.Output<Tensor>(framework::GradVarName("Logits"));
+    logit_grad->ShareDataWith<T>(*context.Input<Tensor>("Softmax"));
+    T* logit_grad_data = logit_grad->data<T>();
+    const int batch_size = logit_grad->dims()[0];
+    const int class_num = logit_grad->dims()[1];
+    const int* label_data = context.Input<Tensor>("Label")->data<int>();
+    const int block = 512;
+    const int grid = (batch_size + block - 1) / block;
+    CrossEntropyWithSoftmaxGradKernel<T><<<grid, block>>>(
+        logit_grad_data, label_data, batch_size, class_num);
+  }
+};
+}  // namespace operators
+}  // namespace paddle
+namespace ops = paddle::operators;
+REGISTER_OP_GPU_KERNEL(softmax_with_cross_entropy,
+                       ops::SoftmaxWithCrossEntropyCUDAKernel<float>);
+REGISTER_OP_GPU_KERNEL(softmax_with_cross_entropy_grad,
+                       ops::SoftmaxWithCrossEntropyGradCUDAKernel<float>);
--- a/paddle/operators/softmax_with_cross_entropy_op.h
+++ b/paddle/operators/softmax_with_cross_entropy_op.h
@@ -30,8 +30,7 @@ template <typename T>
 class SoftmaxWithCrossEntropyKernel : public framework::OpKernel {
 public:
  void Compute(const framework::ExecutionContext& context) const override {
-    auto place = context.GetPlace();
+    PADDLE_ENFORCE(platform::is_cpu_place(context.GetPlace()),
-    PADDLE_ENFORCE(platform::is_cpu_place(place),
                   "This kernel only runs on CPU.");
    // Calculate ths softmax outputs.
@@ -45,7 +44,7 @@ class SoftmaxWithCrossEntropyKernel : public framework::OpKernel {
    T* softmax_out = softmax->data<T>();
    const int* label_data = context.Input<Tensor>("Label")->data<int>();
-    Tensor* loss = context.Output<Tensor>("Loss");
+    Tensor* loss = context.Output<Tensor>("Out");
    loss->mutable_data<T>(context.GetPlace());
    T* loss_data = loss->data<T>();
@@ -74,7 +73,7 @@ class SoftmaxWithCrossEntropyGradKernel : public framework::OpKernel {
    const int* label_data = context.Input<Tensor>("Label")->data<int>();
    for (int i = 0; i < batch_size; ++i) {
      int index = i * class_num + label_data[i];
-      logit_grad_data[index] -= .1;
+      logit_grad_data[index] -= 1.;
    }
  }
 };

--- a/python/paddle/v2/framework/tests/test_cross_entropy_op.py
+++ b/python/paddle/v2/framework/tests/test_cross_entropy_op.py
 import unittest
 import numpy
 from op_test import OpTest
-import pdb
 class TestCrossEntropy(OpTest):

--- a/python/paddle/v2/framework/tests/test_softmax_with_cross_entropy_op.py
+++ b/python/paddle/v2/framework/tests/test_softmax_with_cross_entropy_op.py
 import unittest
 import numpy as np
-import pdb
 from op_test import OpTest
 from test_softmax_op import stable_softmax
@@ -11,7 +10,7 @@ class TestSoftmaxWithCrossEntropyOp(OpTest):
        self.op_type = "softmax_with_cross_entropy"
        MAX_BATCH_SIZE = 23
-        MAX_CLASS_NUM = 10
+        MAX_CLASS_NUM = 17
        batch_size = np.random.randint(1, MAX_BATCH_SIZE, 1)[0]
        class_num = np.random.randint(2, MAX_CLASS_NUM, 1)[0]
@@ -26,13 +25,13 @@ class TestSoftmaxWithCrossEntropyOp(OpTest):
            dtype="float32")
        self.inputs = {"Logits": logits, "Label": labels}
-        self.outputs = {"Softmax": softmax, "Loss": cross_entropy}
+        self.outputs = {"Softmax": softmax, "Out": cross_entropy}
    def test_check_output(self):
        self.check_output()
    def test_check_grad(self):
-        self.check_grad(["Logits"], "Loss")
+        self.check_grad(["Logits"], "Out", max_relative_error=0.05)
 if __name__ == "__main__":