From b6c075527c9810457cb5ca1c5d04ba34a8c5e2a2 Mon Sep 17 00:00:00 2001 From: qijun Date: Tue, 18 Jul 2017 20:14:57 +0800 Subject: [PATCH] implement some basic OpKernel --- paddle/operators/add_op.cc | 5 ++--- paddle/operators/add_op.cu | 3 +-- paddle/operators/mul_op.cc | 2 +- paddle/operators/mul_op.cu | 2 +- paddle/operators/mul_op.h | 17 ++++++++++++--- paddle/operators/rowwise_add_op.cc | 2 +- paddle/operators/rowwise_add_op.cu | 2 +- paddle/operators/rowwise_add_op.h | 19 ++++++++++++++--- paddle/operators/sigmoid_op.cc | 3 ++- paddle/operators/sigmoid_op.cu | 2 +- paddle/operators/sigmoid_op.h | 12 ++++++++--- paddle/operators/softmax_op.cc | 5 ++++- paddle/operators/softmax_op.cu | 2 +- paddle/operators/softmax_op.h | 34 +++++++++++++++++++++++++++--- 14 files changed, 85 insertions(+), 25 deletions(-) diff --git a/paddle/operators/add_op.cc b/paddle/operators/add_op.cc index 41d044cdb7..260c8064ac 100644 --- a/paddle/operators/add_op.cc +++ b/paddle/operators/add_op.cc @@ -53,6 +53,5 @@ The equation is: Out = X + Y } // namespace paddle REGISTER_OP(add_two, paddle::operators::AddOp, paddle::operators::AddOpMaker); -typedef paddle::operators::AddKernel<::paddle::platform::CPUPlace, float> - AddKernel_CPU_float; -REGISTER_OP_CPU_KERNEL(add_two, AddKernel_CPU_float); +REGISTER_OP_CPU_KERNEL( + add_two, paddle::operators::AddKernel); diff --git a/paddle/operators/add_op.cu b/paddle/operators/add_op.cu index 0edf142ee4..2e5a755f92 100644 --- a/paddle/operators/add_op.cu +++ b/paddle/operators/add_op.cu @@ -1,6 +1,5 @@ #include "paddle/operators/add_op.h" #include "paddle/framework/op_registry.h" -typedef paddle::operators::AddKernel<::paddle::platform::GPUPlace, float> AddKernel_GPU_float; REGISTER_OP_GPU_KERNEL(add_two, - AddKernel_GPU_float); \ No newline at end of file + paddle::operators::AddKernel); \ No newline at end of file diff --git a/paddle/operators/mul_op.cc b/paddle/operators/mul_op.cc index 713b2a5dc8..7aa63961a0 100644 --- a/paddle/operators/mul_op.cc +++ b/paddle/operators/mul_op.cc @@ -57,4 +57,4 @@ The equation is: Out = X * Y REGISTER_OP(mul, paddle::operators::MulOp, paddle::operators::MulOpMaker); REGISTER_OP_CPU_KERNEL( - mul, paddle::operators::MulKernel); + mul, paddle::operators::MulKernel); diff --git a/paddle/operators/mul_op.cu b/paddle/operators/mul_op.cu index 201723df24..75f00e746c 100644 --- a/paddle/operators/mul_op.cu +++ b/paddle/operators/mul_op.cu @@ -17,4 +17,4 @@ REGISTER_OP_GPU_KERNEL(mul, paddle::operators::MulKernel); \ No newline at end of file + ::GPUPlace, float>); \ No newline at end of file diff --git a/paddle/operators/mul_op.h b/paddle/operators/mul_op.h index ce8a0169e0..13e5b6a950 100644 --- a/paddle/operators/mul_op.h +++ b/paddle/operators/mul_op.h @@ -20,11 +20,22 @@ namespace paddle { namespace operators { -template +template class MulKernel : public framework::OpKernel { public: - void Compute(const framework::KernelContext &context) const override { - LOG(INFO) << "Mul kernel in " << typeid(Place).name(); + void Compute(const framework::KernelContext& context) const override { + Eigen::array, 1> dim_pair; + dim_pair[0].first = 1; + dim_pair[0].second = 0; + + auto input0 = context.Input(0)->Get(); + auto input1 = context.Input(1)->Get(); + auto* output = context.Output(0)->GetMutable(); + + output->mutable_data(context.GetPlace()); + + output->matrix().device(*(context.GetEigenDevice())) = + input0.matrix().contract(input1.matrix(), dim_pair); } }; } // namespace operators diff --git a/paddle/operators/rowwise_add_op.cc b/paddle/operators/rowwise_add_op.cc index 414bafd046..567b058fd0 100644 --- a/paddle/operators/rowwise_add_op.cc +++ b/paddle/operators/rowwise_add_op.cc @@ -58,4 +58,4 @@ REGISTER_OP(rowwise_add, paddle::operators::RowWiseAddOpMaker); REGISTER_OP_CPU_KERNEL( rowwise_add, - paddle::operators::RowWiseAddKernel); + paddle::operators::RowWiseAddKernel); diff --git a/paddle/operators/rowwise_add_op.cu b/paddle/operators/rowwise_add_op.cu index 2c4bfbf93a..58fe96a4a3 100644 --- a/paddle/operators/rowwise_add_op.cu +++ b/paddle/operators/rowwise_add_op.cu @@ -3,4 +3,4 @@ REGISTER_OP_GPU_KERNEL( rowwise_add, - paddle::operators::RowWiseAddKernel); + paddle::operators::RowWiseAddKernel); diff --git a/paddle/operators/rowwise_add_op.h b/paddle/operators/rowwise_add_op.h index 35f43e6376..f1d43002dc 100644 --- a/paddle/operators/rowwise_add_op.h +++ b/paddle/operators/rowwise_add_op.h @@ -19,11 +19,24 @@ namespace paddle { namespace operators { -template +template class RowWiseAddKernel : public framework::OpKernel { public: - void Compute(const framework::KernelContext &context) const override { - LOG(INFO) << "RowWiseAdd kernel in " << typeid(Place).name(); + void Compute(const framework::KernelContext& context) const override { + auto in0 = context.Input(0)->Get(); + auto in1 = context.Input(1)->Get(); + auto* out = context.Output(0)->GetMutable(); + + auto input = in0.matrix(); + auto bias = in1.vec(); + auto output = out->matrix(); + + const int bias_size = bias.dimension(0); + const int rest_size = input.size() / bias_size; + Eigen::DSizes one_d(input.size()); + Eigen::DSizes bcast(rest_size); + output.reshape(one_d).device(*(context.GetEigenDevice())) = + input.reshape(one_d) + bias.broadcast(bcast).reshape(one_d); } }; diff --git a/paddle/operators/sigmoid_op.cc b/paddle/operators/sigmoid_op.cc index 45ae277c53..fa13f2c4f7 100644 --- a/paddle/operators/sigmoid_op.cc +++ b/paddle/operators/sigmoid_op.cc @@ -46,4 +46,5 @@ REGISTER_OP(sigmoid, paddle::operators::SigmoidOp, paddle::operators::SigmoidOpMaker); REGISTER_OP_CPU_KERNEL( - sigmoid, paddle::operators::SigmoidKernel); + sigmoid, + paddle::operators::SigmoidKernel); diff --git a/paddle/operators/sigmoid_op.cu b/paddle/operators/sigmoid_op.cu index 79d5222348..59bba2729f 100644 --- a/paddle/operators/sigmoid_op.cu +++ b/paddle/operators/sigmoid_op.cu @@ -2,4 +2,4 @@ #include REGISTER_OP_GPU_KERNEL( - sigmoid, paddle::operators::SigmoidKernel); + sigmoid, paddle::operators::SigmoidKernel); diff --git a/paddle/operators/sigmoid_op.h b/paddle/operators/sigmoid_op.h index 42173343f3..7995b75297 100644 --- a/paddle/operators/sigmoid_op.h +++ b/paddle/operators/sigmoid_op.h @@ -20,11 +20,17 @@ namespace paddle { namespace operators { -template +template class SigmoidKernel : public framework::OpKernel { public: - void Compute(const framework::KernelContext &context) const override { - LOG(INFO) << "Sigmoid kernel in " << typeid(Place).name(); + void Compute(const framework::KernelContext& context) const override { + auto input = context.Input(0)->Get(); + auto* output = context.Output(0)->GetMutable(); + + output->mutable_data(context.GetPlace()); + + output->flat().device(*(context.GetEigenDevice())) = + 1.0 / (1.0 + (-1.0 * input.flat()).exp()); } }; } // namespace operators diff --git a/paddle/operators/softmax_op.cc b/paddle/operators/softmax_op.cc index 4ca7be359e..42795adbdc 100644 --- a/paddle/operators/softmax_op.cc +++ b/paddle/operators/softmax_op.cc @@ -23,6 +23,8 @@ protected: const std::vector &inputs, const std::vector &outputs) const override { PADDLE_ENFORCE(inputs.size() == 1, "Only one input is need for softmax"); + PADDLE_ENFORCE(inputs[0]->dims().size() == 2, + "The input of softmax op must be matrix"); PADDLE_ENFORCE(outputs.size() == 1, "Only one output is need for softmax"); outputs[0]->set_dims(inputs[0]->dims()); @@ -46,4 +48,5 @@ public: namespace ops = paddle::operators; REGISTER_OP(softmax, ops::SoftmaxOp, ops::SoftmaxOpMaker); -REGISTER_OP_CPU_KERNEL(softmax, ops::SoftmaxKernel); +REGISTER_OP_CPU_KERNEL(softmax, + ops::SoftmaxKernel); diff --git a/paddle/operators/softmax_op.cu b/paddle/operators/softmax_op.cu index 903eef1b62..730c76a04b 100644 --- a/paddle/operators/softmax_op.cu +++ b/paddle/operators/softmax_op.cu @@ -2,4 +2,4 @@ #include REGISTER_OP_GPU_KERNEL( - softmax, paddle::operators::SoftmaxKernel); + softmax, paddle::operators::SoftmaxKernel); diff --git a/paddle/operators/softmax_op.h b/paddle/operators/softmax_op.h index 74e9e2786b..34a6c299bb 100644 --- a/paddle/operators/softmax_op.h +++ b/paddle/operators/softmax_op.h @@ -20,11 +20,39 @@ namespace paddle { namespace operators { -template +template class SoftmaxKernel : public framework::OpKernel { public: - void Compute(const framework::KernelContext &context) const override { - LOG(INFO) << "Softmax kernel in " << typeid(Place).name(); + void Compute(const framework::KernelContext& context) const override { + auto input = context.Input(0)->Get(); + auto* output = context.Output(0)->GetMutable(); + + auto logits = input.matrix(); + auto softmax = output->matrix(); + + const int kBatchDim = 0; + const int kClassDim = 1; + + const int batch_size = logits.dimension(kBatchDim); + const int num_classes = logits.dimension(kClassDim); + + Eigen::DSizes along_class(kClassDim); + Eigen::DSizes batch_by_one(batch_size, 1); + Eigen::DSizes one_by_class(1, num_classes); + + auto shifted_logits = (logits - logits.maximum(along_class) + .eval() + .reshape(batch_by_one) + .broadcast(one_by_class)); + + softmax.device(*(context.GetEigenDevice())) = shifted_logits.exp(); + + softmax.device(*(context.GetEigenDevice())) = + (softmax * softmax.sum(along_class) + .inverse() + .eval() + .reshape(batch_by_one) + .broadcast(one_by_class)); } }; } // namespace operators -- GitLab