diff --git a/paddle/operators/add_op.cc b/paddle/operators/add_op.cc index 41d044cdb72b5fb2a7f8654e8ad103778e0857d1..260c8064ac3c996940a0216b21fb473e976af8e2 100644 --- a/paddle/operators/add_op.cc +++ b/paddle/operators/add_op.cc @@ -53,6 +53,5 @@ The equation is: Out = X + Y } // namespace paddle REGISTER_OP(add_two, paddle::operators::AddOp, paddle::operators::AddOpMaker); -typedef paddle::operators::AddKernel<::paddle::platform::CPUPlace, float> - AddKernel_CPU_float; -REGISTER_OP_CPU_KERNEL(add_two, AddKernel_CPU_float); +REGISTER_OP_CPU_KERNEL( + add_two, paddle::operators::AddKernel); diff --git a/paddle/operators/add_op.cu b/paddle/operators/add_op.cu index 0edf142ee4e5f359ea14be02dbf3f7f8855f6db1..2e5a755f92e4d1fa487152ed453fe3b2823062ed 100644 --- a/paddle/operators/add_op.cu +++ b/paddle/operators/add_op.cu @@ -1,6 +1,5 @@ #include "paddle/operators/add_op.h" #include "paddle/framework/op_registry.h" -typedef paddle::operators::AddKernel<::paddle::platform::GPUPlace, float> AddKernel_GPU_float; REGISTER_OP_GPU_KERNEL(add_two, - AddKernel_GPU_float); \ No newline at end of file + paddle::operators::AddKernel); \ No newline at end of file diff --git a/paddle/operators/mul_op.cc b/paddle/operators/mul_op.cc index 713b2a5dc83d8dd5a3d944101591d75cb19fe04f..7aa63961a0f49aad7dc88360f1d466b401ca83a2 100644 --- a/paddle/operators/mul_op.cc +++ b/paddle/operators/mul_op.cc @@ -57,4 +57,4 @@ The equation is: Out = X * Y REGISTER_OP(mul, paddle::operators::MulOp, paddle::operators::MulOpMaker); REGISTER_OP_CPU_KERNEL( - mul, paddle::operators::MulKernel); + mul, paddle::operators::MulKernel); diff --git a/paddle/operators/mul_op.cu b/paddle/operators/mul_op.cu index 201723df247993c5cc1650edbe4f74441e3217d4..75f00e746c22968815a450dfc1fc8982ebe677c0 100644 --- a/paddle/operators/mul_op.cu +++ b/paddle/operators/mul_op.cu @@ -17,4 +17,4 @@ REGISTER_OP_GPU_KERNEL(mul, paddle::operators::MulKernel); \ No newline at end of file + ::GPUPlace, float>); \ No newline at end of file diff --git a/paddle/operators/mul_op.h b/paddle/operators/mul_op.h index ce8a0169e0cbaafb7e90d2227c9597fff463883d..13e5b6a95016a4c3c9be575861b13f131c61d983 100644 --- a/paddle/operators/mul_op.h +++ b/paddle/operators/mul_op.h @@ -20,11 +20,22 @@ namespace paddle { namespace operators { -template +template class MulKernel : public framework::OpKernel { public: - void Compute(const framework::KernelContext &context) const override { - LOG(INFO) << "Mul kernel in " << typeid(Place).name(); + void Compute(const framework::KernelContext& context) const override { + Eigen::array, 1> dim_pair; + dim_pair[0].first = 1; + dim_pair[0].second = 0; + + auto input0 = context.Input(0)->Get(); + auto input1 = context.Input(1)->Get(); + auto* output = context.Output(0)->GetMutable(); + + output->mutable_data(context.GetPlace()); + + output->matrix().device(*(context.GetEigenDevice())) = + input0.matrix().contract(input1.matrix(), dim_pair); } }; } // namespace operators diff --git a/paddle/operators/rowwise_add_op.cc b/paddle/operators/rowwise_add_op.cc index 414bafd0468033813d50d4d6723e68ee9347eaac..567b058fd07a74014cac728e7d10f6f2f272ef38 100644 --- a/paddle/operators/rowwise_add_op.cc +++ b/paddle/operators/rowwise_add_op.cc @@ -58,4 +58,4 @@ REGISTER_OP(rowwise_add, paddle::operators::RowWiseAddOpMaker); REGISTER_OP_CPU_KERNEL( rowwise_add, - paddle::operators::RowWiseAddKernel); + paddle::operators::RowWiseAddKernel); diff --git a/paddle/operators/rowwise_add_op.cu b/paddle/operators/rowwise_add_op.cu index 2c4bfbf93a1064a47a19c991fa6655b5d67e83cb..58fe96a4a35314909b99a6c2f0442420c6243fc2 100644 --- a/paddle/operators/rowwise_add_op.cu +++ b/paddle/operators/rowwise_add_op.cu @@ -3,4 +3,4 @@ REGISTER_OP_GPU_KERNEL( rowwise_add, - paddle::operators::RowWiseAddKernel); + paddle::operators::RowWiseAddKernel); diff --git a/paddle/operators/rowwise_add_op.h b/paddle/operators/rowwise_add_op.h index 35f43e6376be6239021e7a9bacb849b93d5226b5..f1d43002dc56ff36a7145795f721422d97d4fdde 100644 --- a/paddle/operators/rowwise_add_op.h +++ b/paddle/operators/rowwise_add_op.h @@ -19,11 +19,24 @@ namespace paddle { namespace operators { -template +template class RowWiseAddKernel : public framework::OpKernel { public: - void Compute(const framework::KernelContext &context) const override { - LOG(INFO) << "RowWiseAdd kernel in " << typeid(Place).name(); + void Compute(const framework::KernelContext& context) const override { + auto in0 = context.Input(0)->Get(); + auto in1 = context.Input(1)->Get(); + auto* out = context.Output(0)->GetMutable(); + + auto input = in0.matrix(); + auto bias = in1.vec(); + auto output = out->matrix(); + + const int bias_size = bias.dimension(0); + const int rest_size = input.size() / bias_size; + Eigen::DSizes one_d(input.size()); + Eigen::DSizes bcast(rest_size); + output.reshape(one_d).device(*(context.GetEigenDevice())) = + input.reshape(one_d) + bias.broadcast(bcast).reshape(one_d); } }; diff --git a/paddle/operators/sigmoid_op.cc b/paddle/operators/sigmoid_op.cc index 45ae277c538ca90716febaf2f3d92b560149d147..fa13f2c4f74c583aff56f907178c77b4acf5d37e 100644 --- a/paddle/operators/sigmoid_op.cc +++ b/paddle/operators/sigmoid_op.cc @@ -46,4 +46,5 @@ REGISTER_OP(sigmoid, paddle::operators::SigmoidOp, paddle::operators::SigmoidOpMaker); REGISTER_OP_CPU_KERNEL( - sigmoid, paddle::operators::SigmoidKernel); + sigmoid, + paddle::operators::SigmoidKernel); diff --git a/paddle/operators/sigmoid_op.cu b/paddle/operators/sigmoid_op.cu index 79d5222348f610b1b016a2df06e8b1e0a4fac66c..59bba2729f7efb31f633dc8448e8b8db51ede49a 100644 --- a/paddle/operators/sigmoid_op.cu +++ b/paddle/operators/sigmoid_op.cu @@ -2,4 +2,4 @@ #include REGISTER_OP_GPU_KERNEL( - sigmoid, paddle::operators::SigmoidKernel); + sigmoid, paddle::operators::SigmoidKernel); diff --git a/paddle/operators/sigmoid_op.h b/paddle/operators/sigmoid_op.h index 42173343f3e364729ecd190fc554b8c45ecfca8d..7995b75297c7b040f5a000e2d39918ce3b5bd4a0 100644 --- a/paddle/operators/sigmoid_op.h +++ b/paddle/operators/sigmoid_op.h @@ -20,11 +20,17 @@ namespace paddle { namespace operators { -template +template class SigmoidKernel : public framework::OpKernel { public: - void Compute(const framework::KernelContext &context) const override { - LOG(INFO) << "Sigmoid kernel in " << typeid(Place).name(); + void Compute(const framework::KernelContext& context) const override { + auto input = context.Input(0)->Get(); + auto* output = context.Output(0)->GetMutable(); + + output->mutable_data(context.GetPlace()); + + output->flat().device(*(context.GetEigenDevice())) = + 1.0 / (1.0 + (-1.0 * input.flat()).exp()); } }; } // namespace operators diff --git a/paddle/operators/softmax_op.cc b/paddle/operators/softmax_op.cc index 4ca7be359e210d7a31aef94e498f37a1ad4879a2..42795adbdc83658558d116ba87ffc6bf34a6fc8e 100644 --- a/paddle/operators/softmax_op.cc +++ b/paddle/operators/softmax_op.cc @@ -23,6 +23,8 @@ protected: const std::vector &inputs, const std::vector &outputs) const override { PADDLE_ENFORCE(inputs.size() == 1, "Only one input is need for softmax"); + PADDLE_ENFORCE(inputs[0]->dims().size() == 2, + "The input of softmax op must be matrix"); PADDLE_ENFORCE(outputs.size() == 1, "Only one output is need for softmax"); outputs[0]->set_dims(inputs[0]->dims()); @@ -46,4 +48,5 @@ public: namespace ops = paddle::operators; REGISTER_OP(softmax, ops::SoftmaxOp, ops::SoftmaxOpMaker); -REGISTER_OP_CPU_KERNEL(softmax, ops::SoftmaxKernel); +REGISTER_OP_CPU_KERNEL(softmax, + ops::SoftmaxKernel); diff --git a/paddle/operators/softmax_op.cu b/paddle/operators/softmax_op.cu index 903eef1b62231d65e2f9ec7a1f57fca0f4c4605c..730c76a04b58636069431c193bd03584411f2b4a 100644 --- a/paddle/operators/softmax_op.cu +++ b/paddle/operators/softmax_op.cu @@ -2,4 +2,4 @@ #include REGISTER_OP_GPU_KERNEL( - softmax, paddle::operators::SoftmaxKernel); + softmax, paddle::operators::SoftmaxKernel); diff --git a/paddle/operators/softmax_op.h b/paddle/operators/softmax_op.h index 74e9e2786b11b9a87cd9700d8458d4e611a8d4bb..34a6c299bbff4da63ff179ae093c0c6866436d06 100644 --- a/paddle/operators/softmax_op.h +++ b/paddle/operators/softmax_op.h @@ -20,11 +20,39 @@ namespace paddle { namespace operators { -template +template class SoftmaxKernel : public framework::OpKernel { public: - void Compute(const framework::KernelContext &context) const override { - LOG(INFO) << "Softmax kernel in " << typeid(Place).name(); + void Compute(const framework::KernelContext& context) const override { + auto input = context.Input(0)->Get(); + auto* output = context.Output(0)->GetMutable(); + + auto logits = input.matrix(); + auto softmax = output->matrix(); + + const int kBatchDim = 0; + const int kClassDim = 1; + + const int batch_size = logits.dimension(kBatchDim); + const int num_classes = logits.dimension(kClassDim); + + Eigen::DSizes along_class(kClassDim); + Eigen::DSizes batch_by_one(batch_size, 1); + Eigen::DSizes one_by_class(1, num_classes); + + auto shifted_logits = (logits - logits.maximum(along_class) + .eval() + .reshape(batch_by_one) + .broadcast(one_by_class)); + + softmax.device(*(context.GetEigenDevice())) = shifted_logits.exp(); + + softmax.device(*(context.GetEigenDevice())) = + (softmax * softmax.sum(along_class) + .inverse() + .eval() + .reshape(batch_by_one) + .broadcast(one_by_class)); } }; } // namespace operators