fix relu functor and revert some codes

0957fa7b · qijun · c18ebc30 · 0957fa7b · 0957fa7b · 0957fa7b
10 changed file
--- a/paddle/framework/operator.cc
+++ b/paddle/framework/operator.cc
@@ -22,14 +22,14 @@ namespace framework {
 template <>
 Eigen::DefaultDevice& ExecutionContext::GetEigenDevice<
    platform::CPUPlace, Eigen::DefaultDevice>() const {
-  return *device_context_->get_eigen_device<platform::CPUPlace>();
+  return *device_context_->get_eigen_device<Eigen::DefaultDevice>();
 }
 #ifndef PADDLE_ONLY_CPU
 template <>
 Eigen::GpuDevice&
 ExecutionContext::GetEigenDevice<platform::GPUPlace, Eigen::GpuDevice>() const {
-  return *device_context_->get_eigen_device<platform::GPUPlace>();
+  return *device_context_->get_eigen_device<Eigen::GpuDevice>();
 }
 #endif

--- a/paddle/framework/operator.h
+++ b/paddle/framework/operator.h
@@ -139,9 +139,9 @@ class OperatorBase {
 // Macro for define a clone method.
 // If you are writing an kernel operator, `Clone` will be defined when you
 // register it. i.e. `Clone` method is not needed to define by yourself.
-#define DEFINE_OP_CLONE_METHOD(cls)                                            \
+#define DEFINE_OP_CLONE_METHOD(cls)                       \
-  std::unique_ptr<::paddle::framework::OperatorBase> Clone() const final {     \
+  std::unique_ptr<OperatorBase> Clone() const final {     \
-    return std::unique_ptr<::paddle::framework::OperatorBase>(new cls(*this)); \
+    return std::unique_ptr<OperatorBase>(new cls(*this)); \
  }
 // Macro for define a default constructor for Operator.
@@ -331,6 +331,21 @@ class InferShapeContext {
  const Scope& scope_;
 };
+template <typename T>
+struct EigenDeviceConverter;
+template <>
+struct EigenDeviceConverter<platform::CPUPlace> {
+  using EigenDeviceType = Eigen::DefaultDevice;
+};
+#ifndef PADDLE_ONLY_CPU
+template <>
+struct EigenDeviceConverter<platform::GPUPlace> {
+  using EigenDeviceType = Eigen::GpuDevice;
+};
+#endif
 class ExecutionContext : public InferShapeContext {
 public:
  ExecutionContext(const OperatorBase& op, const Scope& scope,
@@ -338,8 +353,8 @@ class ExecutionContext : public InferShapeContext {
      : InferShapeContext(op, scope), device_context_(device_context) {}
  template <typename PlaceType,
-            typename DeviceType = typename platform::EigenDeviceConverter<
+            typename DeviceType =
-                PlaceType>::EigenDeviceType>
+                typename EigenDeviceConverter<PlaceType>::EigenDeviceType>
  DeviceType& GetEigenDevice() const;
  platform::Place GetPlace() const { return device_context_->GetPlace(); }

--- a/paddle/operators/activation_op.cc
+++ b/paddle/operators/activation_op.cc
@@ -14,26 +14,6 @@
 #include "paddle/operators/activation_op.h"
-// #define FILL_ACTIVATION_OP                                                  \
-//  public:                                                                    \
-//   using framework::OperatorWithKernel::OperatorWithKernel;                  \
-//                                                                             \
-//  protected:                                                                 \
-//   void InferShape(const framework::InferShapeContext &ctx) const override { \
-//     ctx.Output<framework::Tensor>("Y")->Resize(                             \
-//         ctx.Input<framework::Tensor>("X")->dims());                         \
-//   }
-// #define FILL_ACTIVATION_GRAD_OP                                             \
-//  public:                                                                    \
-//   using framework::OperatorWithKernel::OperatorWithKernel;                  \
-//                                                                             \
-//  protected:                                                                 \
-//   void InferShape(const framework::InferShapeContext &ctx) const override { \
-//     ctx.Output<framework::Tensor>(framework::GradVarName("X"))              \
-//         ->Resize(ctx.Input<framework::Tensor>("Y")->dims());                \
-//   }
 namespace paddle {
 namespace operators {
@@ -59,10 +39,6 @@ class ActivationOpGrad : public framework::OperatorWithKernel {
  }
 };
-// class SigmoidOp : public framework::OperatorWithKernel {
-//   FILL_ACTIVATION_OP
-// };
 class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  SigmoidOpMaker(framework::OpProto *proto,
@@ -74,14 +50,6 @@ class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
  }
 };
-// class SigmoidOpGrad : public framework::OperatorWithKernel {
-//   FILL_ACTIVATION_GRAD_OP
-// };
-// class ExpOp : public framework::OperatorWithKernel {
-//   FILL_ACTIVATION_OP
-// };
 class ExpOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  ExpOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
@@ -92,14 +60,6 @@ class ExpOpMaker : public framework::OpProtoAndCheckerMaker {
  }
 };
-// class ExpOpGrad : public framework::OperatorWithKernel {
-//   FILL_ACTIVATION_GRAD_OP
-// };
-// class ReluOp : public framework::OperatorWithKernel {
-//   FILL_ACTIVATION_OP
-// };
 class ReluOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  ReluOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
@@ -110,36 +70,33 @@ class ReluOpMaker : public framework::OpProtoAndCheckerMaker {
  }
 };
-// class ReluOpGrad : public framework::OperatorWithKernel {
-//   FILL_ACTIVATION_GRAD_OP
-// };
 }  // namespace operators
 }  // namespace paddle
 namespace ops = paddle::operators;
 REGISTER_OP(sigmoid, ops::ActivationOp, ops::SigmoidOpMaker, sigmoid_grad,
            ops::ActivationOpGrad);
+REGISTER_OP_CPU_KERNEL(sigmoid,
+                       ops::ActivationKernel<paddle::platform::CPUPlace, float,
+                                             ops::SigmoidFunctor>);
 REGISTER_OP_CPU_KERNEL(
-    sigmoid,
+    sigmoid_grad, ops::ActivationGradKernel<paddle::platform::CPUPlace, float,
-    ops::ActivationKernel<paddle::platform::CPUPlace, float, ops::Sigmoid>);
+                                            ops::SigmoidGradFunctor>);
-REGISTER_OP_CPU_KERNEL(sigmoid_grad,
-                       ops::ActivationGradKernel<paddle::platform::CPUPlace,
-                                                 float, ops::SigmoidGrad>);
 REGISTER_OP(exp, ops::ActivationOp, ops::ExpOpMaker, exp_grad,
            ops::ActivationOpGrad);
 REGISTER_OP_CPU_KERNEL(
-    exp, ops::ActivationKernel<paddle::platform::CPUPlace, float, ops::Exp>);
+    exp,
+    ops::ActivationKernel<paddle::platform::CPUPlace, float, ops::ExpFunctor>);
+REGISTER_OP_CPU_KERNEL(exp_grad,
+                       ops::ActivationGradKernel<paddle::platform::CPUPlace,
+                                                 float, ops::ExpGradFunctor>);
+REGISTER_OP(relu, ops::ActivationOp, ops::ReluOpMaker, relu_grad,
+            ops::ActivationOpGrad);
+REGISTER_OP_CPU_KERNEL(relu,
+                       ops::ActivationKernel<paddle::platform::CPUPlace, float,
+                                             ops::ReluFunctor<float>>);
 REGISTER_OP_CPU_KERNEL(
-    exp_grad,
+    relu_grad, ops::ActivationGradKernel<paddle::platform::CPUPlace, float,
-    ops::ActivationGradKernel<paddle::platform::CPUPlace, float, ops::ExpGrad>);
+                                         ops::ReluGradFunctor<float>>);
-// REGISTER_OP(relu, ops::ActivationOp, ops::ReluOpMaker, relu_grad,
-// ops::ActivationOpGrad);
-// REGISTER_OP_CPU_KERNEL(relu,
-//                        ops::ReluKernel<paddle::platform::CPUPlace, float,
-//                        ops::Relu>);
-// REGISTER_OP_CPU_KERNEL(relu_grad,
-//                        ops::ReluGradKernel<paddle::platform::CPUPlace, float,
-//                        ops::ReluGrad>);
--- a/paddle/operators/activation_op.cu
+++ b/paddle/operators/activation_op.cu
@@ -18,15 +18,21 @@
 namespace ops = paddle::operators;
 REGISTER_OP_GPU_KERNEL(sigmoid,
-                       ops::SigmoidKernel<paddle::platform::GPUPlace, float>);
+                       ops::ActivationKernel<paddle::platform::GPUPlace, float,
+                                             ops::SigmoidFunctor>);
 REGISTER_OP_GPU_KERNEL(
-    sigmoid_grad, ops::SigmoidGradKernel<paddle::platform::GPUPlace, float>);
+    sigmoid_grad, ops::ActivationGradKernel<paddle::platform::GPUPlace, float,
+                                            ops::SigmoidGradFunctor>);
-REGISTER_OP_GPU_KERNEL(exp, ops::ExpKernel<paddle::platform::GPUPlace, float>);
+REGISTER_OP_GPU_KERNEL(
+    exp,
+    ops::ActivationKernel<paddle::platform::GPUPlace, float, ops::ExpFunctor>);
 REGISTER_OP_GPU_KERNEL(exp_grad,
-                       ops::ExpGradKernel<paddle::platform::GPUPlace, float>);
+                       ops::ActivationGradKernel<paddle::platform::GPUPlace,
+                                                 float, ops::ExpGradFunctor>);
 REGISTER_OP_GPU_KERNEL(relu,
-                       ops::ReluKernel<paddle::platform::GPUPlace, float>);
+                       ops::ActivationKernel<paddle::platform::GPUPlace, float,
-REGISTER_OP_GPU_KERNEL(relu_grad,
+                                             ops::ReluFunctor<float>>);
-                       ops::ReluGradKernel<paddle::platform::GPUPlace, float>);
+REGISTER_OP_GPU_KERNEL(
+    relu_grad, ops::ActivationGradKernel<paddle::platform::GPUPlace, float,
+                                         ops::ReluGradFunctor<float>>);
--- a/paddle/operators/activation_op.h
+++ b/paddle/operators/activation_op.h
@@ -15,42 +15,6 @@
 #pragma once
 #include "paddle/framework/eigen.h"
 #include "paddle/framework/op_registry.h"
-// #include "paddle/operators/math/activation_functor.h"
-// #define ACTIVATION_KERNEL_NAME(ACTIVATION_NAME) ACTIVATION_NAME##Kernel
-// #define DEFINE_ACTIVATION_KERNEL(ACTIVATION_NAME)                              \
-//   template <typename Place, typename T>                                        \
-//   class ACTIVATION_KERNEL_NAME(ACTIVATION_NAME) : public framework::OpKernel { \
-//    public:                                                                     \
-//     void Compute(const framework::ExecutionContext& context) const override {  \
-//       auto* X = context.Input<framework::Tensor>("X");                         \
-//       auto* Y = context.Output<framework::Tensor>("Y");                        \
-//       Y->mutable_data<T>(context.GetPlace());                                  \
-//       math::ACTIVATION_NAME<Place, T> functor;                                 \
-//       auto* device_context = context.device_context();                         \
-//       functor(*device_context, *X, Y);                                         \
-//     }                                                                          \
-//   };
-// #define DEFINE_ACTIVATION_GRAD_KERNEL(ACTIVATION_GRAD_NAME)                   \
-//   template <typename Place, typename T>                                       \
-//   class ACTIVATION_KERNEL_NAME(ACTIVATION_GRAD_NAME)                          \
-//       : public framework::OpKernel {                                          \
-//    public:                                                                    \
-//     void Compute(const framework::ExecutionContext& context) const override { \
-//       auto* X = context.Input<framework::Tensor>("X");                        \
-//       auto* Y = context.Input<framework::Tensor>("Y");                        \
-//       auto* dY =                                                              \
-//           context.Input<framework::Tensor>(framework::GradVarName("Y"));      \
-//       auto* dX =                                                              \
-//           context.Output<framework::Tensor>(framework::GradVarName("X"));     \
-//       dX->mutable_data<T>(context.GetPlace());                                \
-//       math::ACTIVATION_GRAD_NAME<Place, T> functor;                           \
-//       auto* device_context = context.device_context();                        \
-//       functor(*device_context, *X, *Y, *dY, dX);                              \
-//     }                                                                         \
-//   };
 namespace paddle {
 namespace operators {
@@ -91,59 +55,49 @@ class ActivationGradKernel : public framework::OpKernel {
  }
 };
-struct Sigmoid {
+struct SigmoidFunctor {
  template <typename Device, typename X, typename Y>
  void operator()(Device d, X x, Y y) {
    y.device(d) = 1. / (1. + (-x).exp());
  }
 };
-struct SigmoidGrad {
+struct SigmoidGradFunctor {
  template <typename Device, typename X, typename Y, typename dY, typename dX>
  void operator()(Device d, X x, Y y, dY dy, dX dx) {
    dx.device(d) = dy * y * (1. - y);
  }
 };
-struct Exp {
+struct ExpFunctor {
  template <typename Device, typename X, typename Y>
  void operator()(Device d, X x, Y y) {
    y.device(d) = x.exp();
  }
 };
-struct ExpGrad {
+struct ExpGradFunctor {
  template <typename Device, typename X, typename Y, typename dY, typename dX>
  void operator()(Device d, X x, Y y, dY dy, dX dx) {
    dx.device(d) = y;
  }
 };
-// template <typename Device, typename X, typename Y>
+template <typename T>
-// struct Relu {
+struct ReluFunctor {
-//   void operator()(Device d, X x, Y y) {
+  template <typename Device, typename X, typename Y>
-//     y.device(d) = x.cwiseMax(static_cast<T>(0));
+  void operator()(Device d, X x, Y y) {
-//   }
+    y.device(d) = x.cwiseMax(static_cast<T>(0));
-// };
+  }
+};
-// template <typename Device, typename X, typename Y, typename dY, typename dX>
-// struct ReluGrad {
-//   void operator()(Device d, X x, Y y, dY dy, dX dx) {
-//     dx.device(d) = dy * (x > static_cast<T>(0)).template cast<T>();
-//   }
-// };
-// DEFINE_ACTIVATION_KERNEL(Sigmoid);
-// DEFINE_ACTIVATION_GRAD_KERNEL(SigmoidGrad);
-// DEFINE_ACTIVATION_KERNEL(Exp);
-// DEFINE_ACTIVATION_GRAD_KERNEL(ExpGrad);
-// DEFINE_ACTIVATION_KERNEL(Relu);
-// DEFINE_ACTIVATION_GRAD_KERNEL(ReluGrad);
+template <typename T>
+struct ReluGradFunctor {
+  template <typename Device, typename X, typename Y, typename dY, typename dX>
+  void operator()(Device d, X x, Y y, dY dy, dX dx) {
+    dx.device(d) = dy * (x > static_cast<T>(0)).template cast<T>();
+  }
+};
 }  // namespace operators
 }  // namespace paddle
--- a/paddle/operators/math/activation_functor.h
+++ b/paddle/operators/math/activation_functor.h
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-   http://www.apache.org/licenses/LICENSE-2.0
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License. */
-#pragma once
-#include "paddle/framework/eigen.h"
-#include "paddle/framework/tensor.h"
-namespace paddle {
-namespace operators {
-namespace math {
-template <typename Place, typename T>
-struct Sigmoid {
-  void operator()(const platform::DeviceContext& device_context,
-                  const framework::Tensor& X, framework::Tensor* Y) {
-    auto x = framework::EigenVector<T>::Flatten(X);
-    auto y = framework::EigenVector<T>::Flatten(*Y);
-    auto* place = device_context.template get_eigen_device<Place>();
-    y.device(*place) = 1. / (1. + (-x).exp());
-  }
-};
-template <typename Place, typename T>
-struct SigmoidGrad {
-  void operator()(const platform::DeviceContext& device_context,
-                  const framework::Tensor& X, const framework::Tensor& Y,
-                  const framework::Tensor& dY, framework::Tensor* dX) {
-    auto dx = framework::EigenVector<T>::Flatten(*dX);
-    auto y = framework::EigenVector<T>::Flatten(Y);
-    auto dy = framework::EigenVector<T>::Flatten(dY);
-    auto* place = device_context.template get_eigen_device<Place>();
-    dx.device(*place) = dy * y * (1. - y);
-  }
-};
-template <typename Place, typename T>
-struct Exp {
-  void operator()(const platform::DeviceContext& device_context,
-                  const framework::Tensor& input, framework::Tensor* output) {
-    auto x = framework::EigenVector<T>::Flatten(input);
-    auto y = framework::EigenVector<T>::Flatten(*output);
-    auto* place = device_context.template get_eigen_device<Place>();
-    y.device(*place) = x.exp();
-  }
-};
-template <typename Place, typename T>
-struct ExpGrad {
-  void operator()(const platform::DeviceContext& device_context,
-                  const framework::Tensor& X, const framework::Tensor& Y,
-                  const framework::Tensor& dY, framework::Tensor* dX) {
-    auto dx = framework::EigenVector<T>::Flatten(*dX);
-    auto y = framework::EigenVector<T>::Flatten(Y);
-    auto* place = device_context.template get_eigen_device<Place>();
-    dx.device(*place) = y;
-  }
-};
-template <typename Place, typename T>
-struct Relu {
-  void operator()(const platform::DeviceContext& device_context,
-                  const framework::Tensor& input, framework::Tensor* output) {
-    auto x = framework::EigenVector<T>::Flatten(input);
-    auto y = framework::EigenVector<T>::Flatten(*output);
-    auto* place = device_context.template get_eigen_device<Place>();
-    y.device(*place) = x.cwiseMax(static_cast<T>(0));
-  }
-};
-template <typename Place, typename T>
-struct ReluGrad {
-  void operator()(const platform::DeviceContext& device_context,
-                  const framework::Tensor& X, const framework::Tensor& Y,
-                  const framework::Tensor& dY, framework::Tensor* dX) {
-    auto dx = framework::EigenVector<T>::Flatten(*dX);
-    auto dy = framework::EigenVector<T>::Flatten(dY);
-    auto x = framework::EigenVector<T>::Flatten(X);
-    auto* place = device_context.template get_eigen_device<Place>();
-    dx.device(*place) = dy * (x > static_cast<T>(0)).template cast<T>();
-  }
-};
-}  // namespace math
-}  // namespace operators
-}  // namespace paddle
--- a/paddle/platform/device_context.cc
+++ b/paddle/platform/device_context.cc
@@ -16,8 +16,8 @@ namespace paddle {
 namespace platform {
 template <>
-Eigen::DefaultDevice*
+Eigen::DefaultDevice* DeviceContext::get_eigen_device<Eigen::DefaultDevice>()
-DeviceContext::get_eigen_device<CPUPlace, Eigen::DefaultDevice>() const {
+    const {
  return reinterpret_cast<const CPUDeviceContext*>(this)->eigen_device();
 }
@@ -91,8 +91,7 @@ class EigenCudaStreamDevice : public Eigen::StreamInterface {
 };
 template <>
-Eigen::GpuDevice* DeviceContext::get_eigen_device<GPUPlace, Eigen::GpuDevice>()
+Eigen::GpuDevice* DeviceContext::get_eigen_device<Eigen::GpuDevice>() const {
-    const {
  return reinterpret_cast<const CUDADeviceContext*>(this)->eigen_device();
 }

--- a/paddle/platform/device_context.h
+++ b/paddle/platform/device_context.h
@@ -27,29 +27,12 @@ limitations under the License. */
 namespace paddle {
 namespace platform {
-template <typename T>
-struct EigenDeviceConverter;
-template <>
-struct EigenDeviceConverter<platform::CPUPlace> {
-  using EigenDeviceType = Eigen::DefaultDevice;
-};
-#ifndef PADDLE_ONLY_CPU
-template <>
-struct EigenDeviceConverter<platform::GPUPlace> {
-  using EigenDeviceType = Eigen::GpuDevice;
-};
-#endif
 class DeviceContext {
 public:
  virtual ~DeviceContext() {}
  virtual Place GetPlace() const = 0;
-  template <typename PlaceType,
+  template <typename DeviceType>
-            typename DeviceType =
-                typename EigenDeviceConverter<PlaceType>::EigenDeviceType>
  DeviceType* get_eigen_device() const;
 };

--- a/paddle/platform/device_context_test.cc
+++ b/paddle/platform/device_context_test.cc
@@ -24,7 +24,7 @@ TEST(Device, Init) {
  for (int i = 0; i < count; i++) {
    DeviceContext* device_context = new CUDADeviceContext(GPUPlace(i));
    Eigen::GpuDevice* gpu_device =
-        device_context->template get_eigen_device<GPUPlace>();
+        device_context->template get_eigen_device<Eigen::GpuDevice>();
    ASSERT_NE(nullptr, gpu_device);
    delete device_context;
  }

--- a/paddle/pybind/pybind.cc
+++ b/paddle/pybind/pybind.cc
@@ -56,7 +56,7 @@ USE_OP(sum);
 USE_OP(reshape);
 USE_OP(sigmoid);
 USE_OP(exp);
-// USE_OP(relu);
+USE_OP(relu);
 namespace paddle {
 namespace framework {