From b6c075527c9810457cb5ca1c5d04ba34a8c5e2a2 Mon Sep 17 00:00:00 2001
From: qijun <qijun1994@hotmail.com>
Date: Tue, 18 Jul 2017 20:14:57 +0800
Subject: [PATCH] implement some basic OpKernel

---
 paddle/operators/add_op.cc         |  5 ++---
 paddle/operators/add_op.cu         |  3 +--
 paddle/operators/mul_op.cc         |  2 +-
 paddle/operators/mul_op.cu         |  2 +-
 paddle/operators/mul_op.h          | 17 ++++++++++++---
 paddle/operators/rowwise_add_op.cc |  2 +-
 paddle/operators/rowwise_add_op.cu |  2 +-
 paddle/operators/rowwise_add_op.h  | 19 ++++++++++++++---
 paddle/operators/sigmoid_op.cc     |  3 ++-
 paddle/operators/sigmoid_op.cu     |  2 +-
 paddle/operators/sigmoid_op.h      | 12 ++++++++---
 paddle/operators/softmax_op.cc     |  5 ++++-
 paddle/operators/softmax_op.cu     |  2 +-
 paddle/operators/softmax_op.h      | 34 +++++++++++++++++++++++++++---
 14 files changed, 85 insertions(+), 25 deletions(-)
diff --git a/paddle/operators/add_op.cc b/paddle/operators/add_op.cc
index 41d044cdb7..260c8064ac 100644
--- a/paddle/operators/add_op.cc
+++ b/paddle/operators/add_op.cc
@@ -53,6 +53,5 @@ The equation is: Out = X + Y
 }  // namespace paddle
 
 REGISTER_OP(add_two, paddle::operators::AddOp, paddle::operators::AddOpMaker);
-typedef paddle::operators::AddKernel<::paddle::platform::CPUPlace, float>
-    AddKernel_CPU_float;
-REGISTER_OP_CPU_KERNEL(add_two, AddKernel_CPU_float);
+REGISTER_OP_CPU_KERNEL(
+    add_two, paddle::operators::AddKernel<paddle::platform::CPUPlace, float>);
diff --git a/paddle/operators/add_op.cu b/paddle/operators/add_op.cu
index 0edf142ee4..2e5a755f92 100644
--- a/paddle/operators/add_op.cu
+++ b/paddle/operators/add_op.cu
@@ -1,6 +1,5 @@
 #include "paddle/operators/add_op.h"
 #include "paddle/framework/op_registry.h"
 
-typedef paddle::operators::AddKernel<::paddle::platform::GPUPlace, float> AddKernel_GPU_float;
 REGISTER_OP_GPU_KERNEL(add_two,
-                       AddKernel_GPU_float);
\ No newline at end of file
+                       paddle::operators::AddKernel<paddle::platform::GPUPlace, float>);
\ No newline at end of file
diff --git a/paddle/operators/mul_op.cc b/paddle/operators/mul_op.cc
index 713b2a5dc8..7aa63961a0 100644
--- a/paddle/operators/mul_op.cc
+++ b/paddle/operators/mul_op.cc
@@ -57,4 +57,4 @@ The equation is: Out = X * Y
 
 REGISTER_OP(mul, paddle::operators::MulOp, paddle::operators::MulOpMaker);
 REGISTER_OP_CPU_KERNEL(
-    mul, paddle::operators::MulKernel<paddle::platform::CPUPlace>);
+    mul, paddle::operators::MulKernel<paddle::platform::CPUPlace, float>);
diff --git a/paddle/operators/mul_op.cu b/paddle/operators/mul_op.cu
index 201723df24..75f00e746c 100644
--- a/paddle/operators/mul_op.cu
+++ b/paddle/operators/mul_op.cu
@@ -17,4 +17,4 @@
 
 REGISTER_OP_GPU_KERNEL(mul,
                        paddle::operators::MulKernel<paddle::platform
-                       ::GPUPlace>);
\ No newline at end of file
+                       ::GPUPlace, float>);
\ No newline at end of file
diff --git a/paddle/operators/mul_op.h b/paddle/operators/mul_op.h
index ce8a0169e0..13e5b6a950 100644
--- a/paddle/operators/mul_op.h
+++ b/paddle/operators/mul_op.h
@@ -20,11 +20,22 @@
 namespace paddle {
 namespace operators {
 
-template <typename Place>
+template <typename Place, typename T>
 class MulKernel : public framework::OpKernel {
 public:
-  void Compute(const framework::KernelContext &context) const override {
-    LOG(INFO) << "Mul kernel in " << typeid(Place).name();
+  void Compute(const framework::KernelContext& context) const override {
+    Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1> dim_pair;
+    dim_pair[0].first = 1;
+    dim_pair[0].second = 0;
+
+    auto input0 = context.Input(0)->Get<framework::Tensor>();
+    auto input1 = context.Input(1)->Get<framework::Tensor>();
+    auto* output = context.Output(0)->GetMutable<framework::Tensor>();
+
+    output->mutable_data<T>(context.GetPlace());
+
+    output->matrix<T>().device(*(context.GetEigenDevice<Place>())) =
+        input0.matrix<T>().contract(input1.matrix<T>(), dim_pair);
   }
 };
 }  // namespace operators
diff --git a/paddle/operators/rowwise_add_op.cc b/paddle/operators/rowwise_add_op.cc
index 414bafd046..567b058fd0 100644
--- a/paddle/operators/rowwise_add_op.cc
+++ b/paddle/operators/rowwise_add_op.cc
@@ -58,4 +58,4 @@ REGISTER_OP(rowwise_add,
             paddle::operators::RowWiseAddOpMaker);
 REGISTER_OP_CPU_KERNEL(
     rowwise_add,
-    paddle::operators::RowWiseAddKernel<paddle::platform::CPUPlace>);
+    paddle::operators::RowWiseAddKernel<paddle::platform::CPUPlace, float>);
diff --git a/paddle/operators/rowwise_add_op.cu b/paddle/operators/rowwise_add_op.cu
index 2c4bfbf93a..58fe96a4a3 100644
--- a/paddle/operators/rowwise_add_op.cu
+++ b/paddle/operators/rowwise_add_op.cu
@@ -3,4 +3,4 @@
 
 REGISTER_OP_GPU_KERNEL(
     rowwise_add,
-    paddle::operators::RowWiseAddKernel<paddle::platform ::GPUPlace>);
+    paddle::operators::RowWiseAddKernel<paddle::platform ::GPUPlace, float>);
diff --git a/paddle/operators/rowwise_add_op.h b/paddle/operators/rowwise_add_op.h
index 35f43e6376..f1d43002dc 100644
--- a/paddle/operators/rowwise_add_op.h
+++ b/paddle/operators/rowwise_add_op.h
@@ -19,11 +19,24 @@
 namespace paddle {
 namespace operators {
 
-template <typename Place>
+template <typename Place, typename T>
 class RowWiseAddKernel : public framework::OpKernel {
 public:
-  void Compute(const framework::KernelContext &context) const override {
-    LOG(INFO) << "RowWiseAdd kernel in " << typeid(Place).name();
+  void Compute(const framework::KernelContext& context) const override {
+    auto in0 = context.Input(0)->Get<framework::Tensor>();
+    auto in1 = context.Input(1)->Get<framework::Tensor>();
+    auto* out = context.Output(0)->GetMutable<framework::Tensor>();
+
+    auto input = in0.matrix<T>();
+    auto bias = in1.vec<T>();
+    auto output = out->matrix<T>();
+
+    const int bias_size = bias.dimension(0);
+    const int rest_size = input.size() / bias_size;
+    Eigen::DSizes<int, 1> one_d(input.size());
+    Eigen::DSizes<int, 1> bcast(rest_size);
+    output.reshape(one_d).device(*(context.GetEigenDevice<Place>())) =
+        input.reshape(one_d) + bias.broadcast(bcast).reshape(one_d);
   }
 };
 
diff --git a/paddle/operators/sigmoid_op.cc b/paddle/operators/sigmoid_op.cc
index 45ae277c53..fa13f2c4f7 100644
--- a/paddle/operators/sigmoid_op.cc
+++ b/paddle/operators/sigmoid_op.cc
@@ -46,4 +46,5 @@ REGISTER_OP(sigmoid,
             paddle::operators::SigmoidOp,
             paddle::operators::SigmoidOpMaker);
 REGISTER_OP_CPU_KERNEL(
-    sigmoid, paddle::operators::SigmoidKernel<paddle::platform::CPUPlace>);
+    sigmoid,
+    paddle::operators::SigmoidKernel<paddle::platform::CPUPlace, float>);
diff --git a/paddle/operators/sigmoid_op.cu b/paddle/operators/sigmoid_op.cu
index 79d5222348..59bba2729f 100644
--- a/paddle/operators/sigmoid_op.cu
+++ b/paddle/operators/sigmoid_op.cu
@@ -2,4 +2,4 @@
 #include <paddle/framework/op_registry.h>
 
 REGISTER_OP_GPU_KERNEL(
-    sigmoid, paddle::operators::SigmoidKernel<paddle::platform::GPUPlace>);
+    sigmoid, paddle::operators::SigmoidKernel<paddle::platform::GPUPlace, float>);
diff --git a/paddle/operators/sigmoid_op.h b/paddle/operators/sigmoid_op.h
index 42173343f3..7995b75297 100644
--- a/paddle/operators/sigmoid_op.h
+++ b/paddle/operators/sigmoid_op.h
@@ -20,11 +20,17 @@
 namespace paddle {
 namespace operators {
 
-template <typename Place>
+template <typename Place, typename T>
 class SigmoidKernel : public framework::OpKernel {
 public:
-  void Compute(const framework::KernelContext &context) const override {
-    LOG(INFO) << "Sigmoid kernel in " << typeid(Place).name();
+  void Compute(const framework::KernelContext& context) const override {
+    auto input = context.Input(0)->Get<framework::Tensor>();
+    auto* output = context.Output(0)->GetMutable<framework::Tensor>();
+
+    output->mutable_data<T>(context.GetPlace());
+
+    output->flat<T>().device(*(context.GetEigenDevice<Place>())) =
+        1.0 / (1.0 + (-1.0 * input.flat<T>()).exp());
   }
 };
 }  // namespace operators
diff --git a/paddle/operators/softmax_op.cc b/paddle/operators/softmax_op.cc
index 4ca7be359e..42795adbdc 100644
--- a/paddle/operators/softmax_op.cc
+++ b/paddle/operators/softmax_op.cc
@@ -23,6 +23,8 @@ protected:
       const std::vector<const framework::Tensor *> &inputs,
       const std::vector<framework::Tensor *> &outputs) const override {
     PADDLE_ENFORCE(inputs.size() == 1, "Only one input is need for softmax");
+    PADDLE_ENFORCE(inputs[0]->dims().size() == 2,
+                   "The input of softmax op must be matrix");
     PADDLE_ENFORCE(outputs.size() == 1, "Only one output is need for softmax");
 
     outputs[0]->set_dims(inputs[0]->dims());
@@ -46,4 +48,5 @@ public:
 namespace ops = paddle::operators;
 
 REGISTER_OP(softmax, ops::SoftmaxOp, ops::SoftmaxOpMaker);
-REGISTER_OP_CPU_KERNEL(softmax, ops::SoftmaxKernel<paddle::platform::CPUPlace>);
+REGISTER_OP_CPU_KERNEL(softmax,
+                       ops::SoftmaxKernel<paddle::platform::CPUPlace, float>);
diff --git a/paddle/operators/softmax_op.cu b/paddle/operators/softmax_op.cu
index 903eef1b62..730c76a04b 100644
--- a/paddle/operators/softmax_op.cu
+++ b/paddle/operators/softmax_op.cu
@@ -2,4 +2,4 @@
 #include <paddle/operators/softmax_op.h>
 
 REGISTER_OP_GPU_KERNEL(
-    softmax, paddle::operators::SoftmaxKernel<paddle::platform::GPUPlace>);
+    softmax, paddle::operators::SoftmaxKernel<paddle::platform::GPUPlace, float>);
diff --git a/paddle/operators/softmax_op.h b/paddle/operators/softmax_op.h
index 74e9e2786b..34a6c299bb 100644
--- a/paddle/operators/softmax_op.h
+++ b/paddle/operators/softmax_op.h
@@ -20,11 +20,39 @@
 namespace paddle {
 namespace operators {
 
-template <typename Place>
+template <typename Place, typename T>
 class SoftmaxKernel : public framework::OpKernel {
 public:
-  void Compute(const framework::KernelContext &context) const override {
-    LOG(INFO) << "Softmax kernel in " << typeid(Place).name();
+  void Compute(const framework::KernelContext& context) const override {
+    auto input = context.Input(0)->Get<framework::Tensor>();
+    auto* output = context.Output(0)->GetMutable<framework::Tensor>();
+
+    auto logits = input.matrix<T>();
+    auto softmax = output->matrix<T>();
+
+    const int kBatchDim = 0;
+    const int kClassDim = 1;
+
+    const int batch_size = logits.dimension(kBatchDim);
+    const int num_classes = logits.dimension(kClassDim);
+
+    Eigen::DSizes<int, 1> along_class(kClassDim);
+    Eigen::DSizes<int, 2> batch_by_one(batch_size, 1);
+    Eigen::DSizes<int, 2> one_by_class(1, num_classes);
+
+    auto shifted_logits = (logits - logits.maximum(along_class)
+                                        .eval()
+                                        .reshape(batch_by_one)
+                                        .broadcast(one_by_class));
+
+    softmax.device(*(context.GetEigenDevice<Place>())) = shifted_logits.exp();
+
+    softmax.device(*(context.GetEigenDevice<Place>())) =
+        (softmax * softmax.sum(along_class)
+                       .inverse()
+                       .eval()
+                       .reshape(batch_by_one)
+                       .broadcast(one_by_class));
   }
 };
 }  // namespace operators
-- 
GitLab