delete redundant codes.

201c2bcf · caoying03 · 6735585b · 201c2bcf · 201c2bcf
Showing with 18 addition and 77 deletion

paddle/operators/cross_entropy_op.cu paddle/operators/cross_entropy_op.cu +10 -45

python/paddle/v2/framework/tests/test_cross_entropy_op.py python/paddle/v2/framework/tests/test_cross_entropy_op.py +8 -32

未找到文件。
--- a/paddle/operators/cross_entropy_op.cu
+++ b/paddle/operators/cross_entropy_op.cu
@@ -42,10 +42,9 @@ __device__ __forceinline__ T sum_single_warp(T val) {
  return val;
 }

-// This kernel is called when the class number is less than or equal to 512.
 template <typename T>
-__global__ void SoftCrossEntropyKernel1(T* Y, const T* X, const T* label,
-                                        const int class_num) {
+__global__ void SoftCrossEntropyKernel(T* Y, const T* X, const T* label,
+                                       const int class_num) {
  int tid = threadIdx.x;
  extern __shared__ T d_sum[];
  d_sum[tid] = 0;
@@ -69,33 +68,6 @@ __global__ void SoftCrossEntropyKernel1(T* Y, const T* X, const T* label,
  if (tid == 0) Y[blockIdx.x] = -val;
 }

-// This kernel is called when the class number is larger than 512.
-template <typename T, int BlockSize>
-__global__ void SoftCrossEntropyKernel2(T* Y, const T* X, const T* label,
-                                        const int class_num) {
-  int tid = threadIdx.x;
-  __shared__ T d_sum[BlockSize];
-  int next_idx = blockIdx.x * class_num + tid;
-
-  d_sum[tid] = 0;
-  int cur_idx = tid;
-  while (cur_idx < class_num) {
-    d_sum[tid] += TolerableValue<T>()(std::log(X[next_idx])) * label[next_idx];
-    next_idx += BlockSize;
-    cur_idx += BlockSize;
-  }
-  __syncthreads();
-
-  for (unsigned int stride = BlockSize >> 1; stride >= 32; stride >>= 1) {
-    if (tid < stride) d_sum[tid] += d_sum[tid + stride];
-    __syncthreads();
-  }
-
-  T val = d_sum[tid];
-  val = sum_single_warp<T>(val);
-  if (tid == 0) Y[blockIdx.x] = -val;
-}
-
 // TODO(qingqing): make zero setting a common function.
 template <typename T>
 __global__ void zero(T* X, const int N) {
@@ -146,26 +118,19 @@ class CrossEntropyOpCUDAKernel : public framework::OpKernel {

    int batch_size = x->dims()[0];
    int class_num = x->dims()[1];
-    int block = 512;

    if (ctx.Attr<bool>("soft_label")) {
      auto* label_data = ctx.Input<Tensor>("Label")->data<T>();
-      if (class_num > 512) {
-        SoftCrossEntropyKernel2<
-            T, 512><<<batch_size, block, 0,
-                      reinterpret_cast<const platform::CUDADeviceContext&>(
-                          ctx.device_context())
-                          .stream()>>>(y_data, x_data, label_data, class_num);
-      } else {
-        int block_size = pow(2, int(std::log2(class_num)));
-        SoftCrossEntropyKernel1<
-            T><<<batch_size, block_size, block_size * sizeof(T),
-                 reinterpret_cast<const platform::CUDADeviceContext&>(
-                     ctx.device_context())
-                     .stream()>>>(y_data, x_data, label_data, class_num);
-      }
+      int block = class_num > 512 ? 512 : pow(2, int(std::log2(class_num)));
+
+      SoftCrossEntropyKernel<
+          T><<<batch_size, block, block * sizeof(T),
+               reinterpret_cast<const platform::CUDADeviceContext&>(
+                   ctx.device_context())
+                   .stream()>>>(y_data, x_data, label_data, class_num);
    } else {
      auto* label_data = ctx.Input<Tensor>("Label")->data<int>();
+      int block = 512;
      int grid = (batch_size + block - 1) / block;
      CrossEntropyKernel<T><<<
          grid, block, 0, reinterpret_cast<const platform::CUDADeviceContext&>(

--- a/python/paddle/v2/framework/tests/test_cross_entropy_op.py
+++ b/python/paddle/v2/framework/tests/test_cross_entropy_op.py
@@ -4,19 +4,21 @@ from op_test import OpTest


 class TestCrossEntropyOp1(OpTest):
-    """Test standard cross-entropy, with index representation of labels.
+    """Test cross-entropy with discrete one-hot labels.
    """

    def setUp(self):
        self.op_type = "cross_entropy"
        batch_size = 30
        class_num = 10
+
        X = np.random.uniform(0.1, 1.0,
                              [batch_size, class_num]).astype("float32")
        label = np.random.randint(0, class_num, (batch_size, 1), dtype="int32")
        cross_entropy = np.asmatrix(
            [[-np.log(X[i][label[i][0]])] for i in range(X.shape[0])],
            dtype="float32")
+
        self.inputs = {"X": X, "Label": label}
        self.outputs = {"Y": cross_entropy}
        self.attrs = {"soft_label": False}
@@ -29,14 +31,14 @@ class TestCrossEntropyOp1(OpTest):


 class TestCrossEntropyOp2(OpTest):
-    """Test soft-label cross-entropy, with vecterized soft labels.
+    """Test cross-entropy with vectorized soft labels.
    """

    def setUp(self):
        self.op_type = "cross_entropy"
        batch_size = 5
-        # this setting tests threads in more than one wrap.
        class_num = 37
+
        X = np.random.uniform(0.1, 1.0,
                              [batch_size, class_num]).astype("float32")
        label = np.random.uniform(0.1, 1.0,
@@ -44,6 +46,7 @@ class TestCrossEntropyOp2(OpTest):
        label /= label.sum(axis=1, keepdims=True)
        cross_entropy = (-label * np.log(X)).sum(
            axis=1, keepdims=True).astype("float32")
+
        self.inputs = {"X": X, "Label": label}
        self.outputs = {"Y": cross_entropy}
        self.attrs = {"soft_label": True}
@@ -56,15 +59,14 @@ class TestCrossEntropyOp2(OpTest):


 class TestCrossEntropyOp3(OpTest):
-    """Test one-hot cross-entropy, with vecterized one-hot representation of
-    labels.
+    """Test cross-entropy with vectorized one-hot representation of labels.
    """

    def setUp(self):
        self.op_type = "cross_entropy"
        batch_size = 5
-        # this setting tests all threads in one wrap.
        class_num = 17
+
        X = np.random.uniform(0.1, 1.0,
                              [batch_size, class_num]).astype("float32")
        label_index = np.random.randint(
@@ -76,33 +78,7 @@ class TestCrossEntropyOp3(OpTest):
            dtype="float32")
        cross_entropy2 = (-label * np.log(X)).sum(
            axis=1, keepdims=True).astype("float32")
-        self.inputs = {"X": X, "Label": label}
-        self.outputs = {"Y": cross_entropy}
-        self.attrs = {"soft_label": True}
-
-    def test_check_output(self):
-        self.check_output()
-
-    def test_check_grad(self):
-        self.check_grad(["X"], "Y", max_relative_error=0.05)
-
-
-class TestCrossEntropyOp4(OpTest):
-    """Test soft-label cross-entropy.
-    This unittest tests the gpu kernel for layer size excesses 512.
-    """

-    def setUp(self):
-        self.op_type = "cross_entropy"
-        batch_size = 2
-        class_num = 517
-        X = np.random.uniform(0.1, 1.0,
-                              [batch_size, class_num]).astype("float32")
-        label = np.random.uniform(0.1, 1.0,
-                                  [batch_size, class_num]).astype("float32")
-        label /= label.sum(axis=1, keepdims=True)
-        cross_entropy = (-label * np.log(X)).sum(
-            axis=1, keepdims=True).astype("float32")
        self.inputs = {"X": X, "Label": label}
        self.outputs = {"Y": cross_entropy}
        self.attrs = {"soft_label": True}