Remove set functor and add comapre_grad test

f188e22b · dangqingqing · a8d072c7 · f188e22b · f188e22b · a8d072c7
11 changed file
--- a/paddle/operators/CMakeLists.txt
+++ b/paddle/operators/CMakeLists.txt
@@ -42,7 +42,6 @@ function(op_library TARGET)
 endfunction()

 add_subdirectory(math)
-add_subdirectory(functor)

 cc_test(gather_test SRCS gather_test.cc DEPS tensor)

@@ -69,4 +68,4 @@ op_library(sgd_op SRCS sgd_op.cc sgd_op.cu)
 op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc
    DEPS framework_proto tensor op_registry operator net_op)
 op_library(uniform_random_op SRCS uniform_random_op.cc uniform_random_op.cu)
-op_library(lookup_table_op SRCS lookup_table_op.cc lookup_table_op.cu DEPS math_functor)
+op_library(lookup_table_op SRCS lookup_table_op.cc lookup_table_op.cu)
--- a/paddle/operators/fill_zeros_like_op.h
+++ b/paddle/operators/fill_zeros_like_op.h
@@ -26,7 +26,7 @@ class FillZerosLikeKernel : public framework::OpKernel {
    auto* output = context.Output<framework::Tensor>("Dst");
    output->mutable_data<T>(context.GetPlace());
    auto t = framework::EigenVector<T>::Flatten(*output);
-    t.device(context.GetEigenDevice<Place>()) = t.constant(T(0));
+    t.device(context.GetEigenDevice<Place>()) = t.constant(static_cast<T>(0));
  }
 };


--- a/paddle/operators/functor/CMakeLists.txt
+++ b/paddle/operators/functor/CMakeLists.txt
-if(WITH_GPU)
-    nv_library(math_functor SRCS math_functor.cc math_functor.cu DEPS device_context)
-else()
-    cc_library(math_functor SRCS math_functor.cc DEPS device_context)
-endif()
--- a/paddle/operators/functor/math_functor.cc
+++ b/paddle/operators/functor/math_functor.cc
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/operators/functor/math_functor.h"
-#include "paddle/framework/eigen.h"
-
-namespace paddle {
-namespace operators {
-namespace functor {
-
-template <typename T>
-struct Set<platform::CPUPlace, T> {
-  void operator()(const T alpha, framework::Tensor* Y,
-                  platform::DeviceContext* context) {
-    int N = product(Y->dims());
-    T* YData = Y->mutable_data<T>(context->GetPlace());
-    if (alpha == static_cast<T>(0)) {
-      memset(YData, 0, N * sizeof(T));
-    } else {
-      framework::EigenVector<T, Eigen::RowMajor, Eigen::DenseIndex>::Flatten(*Y)
-          .setConstant(alpha);
-    }
-  }
-};
-
-template struct Set<platform::CPUPlace, float>;
-template struct Set<platform::CPUPlace, double>;
-
-}  // namespace functor
-}  // namespace operators
-}  // namespace paddle
--- a/paddle/operators/functor/math_functor.cu
+++ b/paddle/operators/functor/math_functor.cu
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/operators/functor/math_functor.h"
-#include "paddle/platform/cuda_helper.h"
-
-namespace paddle {
-namespace operators {
-namespace functor {
-
-template <typename T>
-__global__ void SetKernel(const int N, const T alpha, T* Y) {
-  CUDA_1D_KERNEL_LOOP(i, N) { Y[i] = alpha; }
-}
-
-template <typename T>
-struct Set<platform::GPUPlace, T> {
-  void operator()(const T alpha, framework::Tensor* Y,
-                  platform::DeviceContext* context) {
-    int N = product(Y->dims());
-    T* YData = Y->mutable_data<T>(context->GetPlace());
-    SetKernel<<<(N + 512 - 1) / 512, 512>>>(N, alpha, YData);
-  }
-};
-
-template struct Set<platform::GPUPlace, float>;
-template struct Set<platform::GPUPlace, double>;
-
-}  // namespace functor
-}  // namespace operators
-}  // namespace paddle
--- a/paddle/operators/functor/math_functor.h
+++ b/paddle/operators/functor/math_functor.h
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-#include "paddle/framework/tensor.h"
-#include "paddle/platform/device_context.h"
-
-namespace paddle {
-namespace operators {
-namespace functor {
-
-template <typename Place, typename T>
-struct Set {
-  void operator()(const T alpha, paddle::framework::Tensor* Y,
-                  paddle::platform::DeviceContext* context);
-};
-
-}  // namespace functor
-}  // namespace operators
-}  // namespace paddle
--- a/paddle/operators/lookup_table_op.cu
+++ b/paddle/operators/lookup_table_op.cu
@@ -12,8 +12,8 @@
   See the License for the specific language governing permissions and
   limitations under the License. */

+#include "paddle/framework/eigen.h"
 #include "paddle/framework/op_registry.h"
-#include "paddle/operators/functor/math_functor.h"
 #include "paddle/platform/assert.h"
 #include "paddle/platform/cuda_helper.h"

@@ -22,11 +22,11 @@ namespace operators {

 using Tensor = framework::Tensor;

-template <typename T, int blockDimX, int blockDimY, int gridDimX>
+template <typename T, int BlockDimX, int BlockDimY, int GridDimX>
 __global__ void LookupTable(T* output, const T* table, const int32_t* ids,
                            const int N, const int K, const int D) {
  int idx = threadIdx.x;
-  int idy = blockIdx.x + threadIdx.y * gridDimX;
+  int idy = blockIdx.x + threadIdx.y * GridDimX;

  while (idy < K) {
    int id = ids[idy];
@@ -34,18 +34,18 @@ __global__ void LookupTable(T* output, const T* table, const int32_t* ids,
    PADDLE_ASSERT(id < N);
    T* out = output + idy * D;
    const T* tab = table + id * D;
-    for (int i = idx; i < D; i += blockDimX) {
+    for (int i = idx; i < D; i += BlockDimX) {
      out[i] = tab[i];
    }
-    idy += blockDimY * gridDimX;
+    idy += BlockDimY * GridDimX;
  }
 }

-template <typename T, int blockDimX, int blockDimY, int gridDimX>
+template <typename T, int BlockDimX, int BlockDimY, int GridDimX>
 __global__ void LookupTableGrad(T* table, const T* output, const int32_t* ids,
                                const int N, const int K, const int D) {
  int idx = threadIdx.x;
-  int idy = blockIdx.x + threadIdx.y * gridDimX;
+  int idy = blockIdx.x + threadIdx.y * GridDimX;

  while (idy < K) {
    int id = ids[idy];
@@ -53,10 +53,10 @@ __global__ void LookupTableGrad(T* table, const T* output, const int32_t* ids,
    PADDLE_ASSERT(id < N);
    const T* out = output + idy * D;
    T* tab = table + id * D;
-    for (int i = idx; i < D; i += blockDimX) {
+    for (int i = idx; i < D; i += BlockDimX) {
      paddle::platform::CudaAtomicAdd(&tab[i], out[i]);
    }
-    idy += blockDimY * gridDimX;
+    idy += BlockDimY * GridDimX;
  }
 }

@@ -96,10 +96,10 @@ class LookupTableGradCUDAKernel : public framework::OpKernel {
    const T* d_output = d_output_t->data<T>();
    T* d_table = d_table_t->mutable_data<T>(context.GetPlace());

-    auto* device_context =
-        const_cast<platform::DeviceContext*>(context.device_context_);
-    functor::Set<paddle::platform::GPUPlace, T>()(static_cast<T>(0), d_table_t,
-                                                  device_context);
+    auto t = framework::EigenVector<T>::Flatten(*d_table_t);
+    t.device(context.GetEigenDevice<platform::GPUPlace>()) =
+        t.constant(static_cast<T>(0));
+
    dim3 threads(128, 8);
    dim3 grids(8, 1);
    LookupTableGrad<T, 128, 8, 8><<<grids, threads>>>(d_table, d_output, ids, N,

--- a/paddle/operators/lookup_table_op.h
+++ b/paddle/operators/lookup_table_op.h
@@ -14,8 +14,8 @@

 #pragma once

+#include "paddle/framework/eigen.h"
 #include "paddle/framework/op_registry.h"
-#include "paddle/operators/functor/math_functor.h"

 namespace paddle {
 namespace operators {
@@ -57,10 +57,10 @@ class LookupTableGradKernel : public framework::OpKernel {
    const T* d_output = d_output_t->data<T>();
    T* d_table = d_table_t->mutable_data<T>(context.GetPlace());

-    auto* device_context =
-        const_cast<platform::DeviceContext*>(context.device_context_);
-    functor::Set<paddle::platform::CPUPlace, T>()(static_cast<T>(0), d_table_t,
-                                                  device_context);
+    auto t = framework::EigenVector<T>::Flatten(*d_table_t);
+    t.device(context.GetEigenDevice<platform::CPUPlace>()) =
+        t.constant(static_cast<T>(0));
+
    for (size_t i = 0; i < product(ids_t->dims()); ++i) {
      PADDLE_ENFORCE_LT(ids[i], N);
      PADDLE_ENFORCE_GE(ids[i], 0);

--- a/paddle/platform/cuda_helper.h
+++ b/paddle/platform/cuda_helper.h
@@ -18,10 +18,6 @@ limitations under the License. */
 namespace paddle {
 namespace platform {

-#define CUDA_1D_KERNEL_LOOP(i, n)                            \
-  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
-       i += blockDim.x * gridDim.x)
-
 #define CUDA_ATOMIC_WRAPPER(op, T) \
  __device__ __forceinline__ T CudaAtomic##op(T* address, const T val)


--- a/python/paddle/v2/framework/tests/gradient_checker.py
+++ b/python/paddle/v2/framework/tests/gradient_checker.py
@@ -23,6 +23,10 @@ def grad_var_name(var_name):
    return var_name + "@GRAD"


+def empty_var_name():
+    return "@EMPTY@"
+
+
 def get_numeric_gradient(op,
                         input_values,
                         output_name,
@@ -171,7 +175,7 @@ class GradientChecker(unittest.TestCase):
        ]
        return outs

-    def compare_grad(self, forward_op, input_value):
+    def compare_grad(self, forward_op, input_value, no_grad_set=None):
        """ Compare the input gradients between CPU and GPU for the given forward
        operator.

@@ -179,15 +183,20 @@ class GradientChecker(unittest.TestCase):
        :type forward_op: Operator
        :param input_value: input values.
        :type input_value: dict{string:numpy.array}
+        :param no_grad_set: the set of variables names without gradients.
+        :type no_grad_set: a set of string
        :raises: AssertionError, there is different gradient value.
        """
-        backward_op = core.Operator.backward(forward_op, set())
+        if no_grad_set is None:
+            no_grad_set = set()
+        backward_op = core.Operator.backward(forward_op, no_grad_set)
        # return if not compile with GPU or not implementing GPU kernel
        if not (core.is_compile_gpu() and backward_op.support_gpu()):
            return

        outputs = backward_op.outputs()
        out_names = [item for k in outputs for item in outputs[k]]
+        out_names = filter(lambda x: x != empty_var_name(), out_names)
        cpu_grads = self.__get_gradient(forward_op, backward_op, input_value,
                                        out_names, core.CPUPlace())
        gpu_grads = self.__get_gradient(forward_op, backward_op, input_value,

--- a/python/paddle/v2/framework/tests/test_lookup_table.py
+++ b/python/paddle/v2/framework/tests/test_lookup_table.py
@@ -21,6 +21,8 @@ class TestSigmoidGradOp(GradientChecker):
        table = np.random.random((17, 31)).astype('float32')
        ids = np.random.randint(0, 17, 4).astype('int32')
        inputs = {'W': table, 'Ids': ids}
+        # comapre gradients 
+        self.compare_grad(op, inputs, set(['Ids']))
        # check gradients 
        self.check_grad(op, inputs, set('W'), 'Out')