implement some basic OpKernel

b6c07552 · qijun · 3208914b · b6c07552 · b6c07552 · b6c07552
14 changed file
--- a/paddle/operators/add_op.cc
+++ b/paddle/operators/add_op.cc
@@ -53,6 +53,5 @@ The equation is: Out = X + Y
 }  // namespace paddle

 REGISTER_OP(add_two, paddle::operators::AddOp, paddle::operators::AddOpMaker);
-typedef paddle::operators::AddKernel<::paddle::platform::CPUPlace, float>
-    AddKernel_CPU_float;
-REGISTER_OP_CPU_KERNEL(add_two, AddKernel_CPU_float);
+REGISTER_OP_CPU_KERNEL(
+    add_two, paddle::operators::AddKernel<paddle::platform::CPUPlace, float>);
--- a/paddle/operators/add_op.cu
+++ b/paddle/operators/add_op.cu
 #include "paddle/operators/add_op.h"
 #include "paddle/framework/op_registry.h"

-typedef paddle::operators::AddKernel<::paddle::platform::GPUPlace, float> AddKernel_GPU_float;
 REGISTER_OP_GPU_KERNEL(add_two,
-                       AddKernel_GPU_float);
\ No newline at end of file
+                       paddle::operators::AddKernel<paddle::platform::GPUPlace, float>);
\ No newline at end of file
--- a/paddle/operators/mul_op.cc
+++ b/paddle/operators/mul_op.cc
@@ -57,4 +57,4 @@ The equation is: Out = X * Y

 REGISTER_OP(mul, paddle::operators::MulOp, paddle::operators::MulOpMaker);
 REGISTER_OP_CPU_KERNEL(
-    mul, paddle::operators::MulKernel<paddle::platform::CPUPlace>);
+    mul, paddle::operators::MulKernel<paddle::platform::CPUPlace, float>);
--- a/paddle/operators/mul_op.cu
+++ b/paddle/operators/mul_op.cu
@@ -17,4 +17,4 @@

 REGISTER_OP_GPU_KERNEL(mul,
                       paddle::operators::MulKernel<paddle::platform
-                       ::GPUPlace>);
\ No newline at end of file
+                       ::GPUPlace, float>);
\ No newline at end of file
--- a/paddle/operators/mul_op.h
+++ b/paddle/operators/mul_op.h
@@ -20,11 +20,22 @@
 namespace paddle {
 namespace operators {

-template <typename Place>
+template <typename Place, typename T>
 class MulKernel : public framework::OpKernel {
 public:
-  void Compute(const framework::KernelContext &context) const override {
-    LOG(INFO) << "Mul kernel in " << typeid(Place).name();
+  void Compute(const framework::KernelContext& context) const override {
+    Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1> dim_pair;
+    dim_pair[0].first = 1;
+    dim_pair[0].second = 0;
+
+    auto input0 = context.Input(0)->Get<framework::Tensor>();
+    auto input1 = context.Input(1)->Get<framework::Tensor>();
+    auto* output = context.Output(0)->GetMutable<framework::Tensor>();
+
+    output->mutable_data<T>(context.GetPlace());
+
+    output->matrix<T>().device(*(context.GetEigenDevice<Place>())) =
+        input0.matrix<T>().contract(input1.matrix<T>(), dim_pair);
  }
 };
 }  // namespace operators

--- a/paddle/operators/rowwise_add_op.cc
+++ b/paddle/operators/rowwise_add_op.cc
@@ -58,4 +58,4 @@ REGISTER_OP(rowwise_add,
            paddle::operators::RowWiseAddOpMaker);
 REGISTER_OP_CPU_KERNEL(
    rowwise_add,
-    paddle::operators::RowWiseAddKernel<paddle::platform::CPUPlace>);
+    paddle::operators::RowWiseAddKernel<paddle::platform::CPUPlace, float>);
--- a/paddle/operators/rowwise_add_op.cu
+++ b/paddle/operators/rowwise_add_op.cu
@@ -3,4 +3,4 @@

 REGISTER_OP_GPU_KERNEL(
    rowwise_add,
-    paddle::operators::RowWiseAddKernel<paddle::platform ::GPUPlace>);
+    paddle::operators::RowWiseAddKernel<paddle::platform ::GPUPlace, float>);
--- a/paddle/operators/rowwise_add_op.h
+++ b/paddle/operators/rowwise_add_op.h
@@ -19,11 +19,24 @@
 namespace paddle {
 namespace operators {

-template <typename Place>
+template <typename Place, typename T>
 class RowWiseAddKernel : public framework::OpKernel {
 public:
-  void Compute(const framework::KernelContext &context) const override {
-    LOG(INFO) << "RowWiseAdd kernel in " << typeid(Place).name();
+  void Compute(const framework::KernelContext& context) const override {
+    auto in0 = context.Input(0)->Get<framework::Tensor>();
+    auto in1 = context.Input(1)->Get<framework::Tensor>();
+    auto* out = context.Output(0)->GetMutable<framework::Tensor>();
+
+    auto input = in0.matrix<T>();
+    auto bias = in1.vec<T>();
+    auto output = out->matrix<T>();
+
+    const int bias_size = bias.dimension(0);
+    const int rest_size = input.size() / bias_size;
+    Eigen::DSizes<int, 1> one_d(input.size());
+    Eigen::DSizes<int, 1> bcast(rest_size);
+    output.reshape(one_d).device(*(context.GetEigenDevice<Place>())) =
+        input.reshape(one_d) + bias.broadcast(bcast).reshape(one_d);
  }
 };


--- a/paddle/operators/sigmoid_op.cc
+++ b/paddle/operators/sigmoid_op.cc
@@ -46,4 +46,5 @@ REGISTER_OP(sigmoid,
            paddle::operators::SigmoidOp,
            paddle::operators::SigmoidOpMaker);
 REGISTER_OP_CPU_KERNEL(
-    sigmoid, paddle::operators::SigmoidKernel<paddle::platform::CPUPlace>);
+    sigmoid,
+    paddle::operators::SigmoidKernel<paddle::platform::CPUPlace, float>);
--- a/paddle/operators/sigmoid_op.cu
+++ b/paddle/operators/sigmoid_op.cu
@@ -2,4 +2,4 @@
 #include <paddle/framework/op_registry.h>

 REGISTER_OP_GPU_KERNEL(
-    sigmoid, paddle::operators::SigmoidKernel<paddle::platform::GPUPlace>);
+    sigmoid, paddle::operators::SigmoidKernel<paddle::platform::GPUPlace, float>);
--- a/paddle/operators/sigmoid_op.h
+++ b/paddle/operators/sigmoid_op.h
@@ -20,11 +20,17 @@
 namespace paddle {
 namespace operators {

-template <typename Place>
+template <typename Place, typename T>
 class SigmoidKernel : public framework::OpKernel {
 public:
-  void Compute(const framework::KernelContext &context) const override {
-    LOG(INFO) << "Sigmoid kernel in " << typeid(Place).name();
+  void Compute(const framework::KernelContext& context) const override {
+    auto input = context.Input(0)->Get<framework::Tensor>();
+    auto* output = context.Output(0)->GetMutable<framework::Tensor>();
+
+    output->mutable_data<T>(context.GetPlace());
+
+    output->flat<T>().device(*(context.GetEigenDevice<Place>())) =
+        1.0 / (1.0 + (-1.0 * input.flat<T>()).exp());
  }
 };
 }  // namespace operators

--- a/paddle/operators/softmax_op.cc
+++ b/paddle/operators/softmax_op.cc
@@ -23,6 +23,8 @@ protected:
      const std::vector<const framework::Tensor *> &inputs,
      const std::vector<framework::Tensor *> &outputs) const override {
    PADDLE_ENFORCE(inputs.size() == 1, "Only one input is need for softmax");
+    PADDLE_ENFORCE(inputs[0]->dims().size() == 2,
+                   "The input of softmax op must be matrix");
    PADDLE_ENFORCE(outputs.size() == 1, "Only one output is need for softmax");

    outputs[0]->set_dims(inputs[0]->dims());
@@ -46,4 +48,5 @@ public:
 namespace ops = paddle::operators;

 REGISTER_OP(softmax, ops::SoftmaxOp, ops::SoftmaxOpMaker);
-REGISTER_OP_CPU_KERNEL(softmax, ops::SoftmaxKernel<paddle::platform::CPUPlace>);
+REGISTER_OP_CPU_KERNEL(softmax,
+                       ops::SoftmaxKernel<paddle::platform::CPUPlace, float>);
--- a/paddle/operators/softmax_op.cu
+++ b/paddle/operators/softmax_op.cu
@@ -2,4 +2,4 @@
 #include <paddle/operators/softmax_op.h>

 REGISTER_OP_GPU_KERNEL(
-    softmax, paddle::operators::SoftmaxKernel<paddle::platform::GPUPlace>);
+    softmax, paddle::operators::SoftmaxKernel<paddle::platform::GPUPlace, float>);
--- a/paddle/operators/softmax_op.h
+++ b/paddle/operators/softmax_op.h
@@ -20,11 +20,39 @@
 namespace paddle {
 namespace operators {

-template <typename Place>
+template <typename Place, typename T>
 class SoftmaxKernel : public framework::OpKernel {
 public:
-  void Compute(const framework::KernelContext &context) const override {
-    LOG(INFO) << "Softmax kernel in " << typeid(Place).name();
+  void Compute(const framework::KernelContext& context) const override {
+    auto input = context.Input(0)->Get<framework::Tensor>();
+    auto* output = context.Output(0)->GetMutable<framework::Tensor>();
+
+    auto logits = input.matrix<T>();
+    auto softmax = output->matrix<T>();
+
+    const int kBatchDim = 0;
+    const int kClassDim = 1;
+
+    const int batch_size = logits.dimension(kBatchDim);
+    const int num_classes = logits.dimension(kClassDim);
+
+    Eigen::DSizes<int, 1> along_class(kClassDim);
+    Eigen::DSizes<int, 2> batch_by_one(batch_size, 1);
+    Eigen::DSizes<int, 2> one_by_class(1, num_classes);
+
+    auto shifted_logits = (logits - logits.maximum(along_class)
+                                        .eval()
+                                        .reshape(batch_by_one)
+                                        .broadcast(one_by_class));
+
+    softmax.device(*(context.GetEigenDevice<Place>())) = shifted_logits.exp();
+
+    softmax.device(*(context.GetEigenDevice<Place>())) =
+        (softmax * softmax.sum(along_class)
+                       .inverse()
+                       .eval()
+                       .reshape(batch_by_one)
+                       .broadcast(one_by_class));
  }
 };
 }  // namespace operators