提交 b6c07552 编写于 作者: Q qijun

implement some basic OpKernel

上级 3208914b
...@@ -53,6 +53,5 @@ The equation is: Out = X + Y ...@@ -53,6 +53,5 @@ The equation is: Out = X + Y
} // namespace paddle } // namespace paddle
REGISTER_OP(add_two, paddle::operators::AddOp, paddle::operators::AddOpMaker); REGISTER_OP(add_two, paddle::operators::AddOp, paddle::operators::AddOpMaker);
typedef paddle::operators::AddKernel<::paddle::platform::CPUPlace, float> REGISTER_OP_CPU_KERNEL(
AddKernel_CPU_float; add_two, paddle::operators::AddKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(add_two, AddKernel_CPU_float);
#include "paddle/operators/add_op.h" #include "paddle/operators/add_op.h"
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
typedef paddle::operators::AddKernel<::paddle::platform::GPUPlace, float> AddKernel_GPU_float;
REGISTER_OP_GPU_KERNEL(add_two, REGISTER_OP_GPU_KERNEL(add_two,
AddKernel_GPU_float); paddle::operators::AddKernel<paddle::platform::GPUPlace, float>);
\ No newline at end of file \ No newline at end of file
...@@ -57,4 +57,4 @@ The equation is: Out = X * Y ...@@ -57,4 +57,4 @@ The equation is: Out = X * Y
REGISTER_OP(mul, paddle::operators::MulOp, paddle::operators::MulOpMaker); REGISTER_OP(mul, paddle::operators::MulOp, paddle::operators::MulOpMaker);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
mul, paddle::operators::MulKernel<paddle::platform::CPUPlace>); mul, paddle::operators::MulKernel<paddle::platform::CPUPlace, float>);
...@@ -17,4 +17,4 @@ ...@@ -17,4 +17,4 @@
REGISTER_OP_GPU_KERNEL(mul, REGISTER_OP_GPU_KERNEL(mul,
paddle::operators::MulKernel<paddle::platform paddle::operators::MulKernel<paddle::platform
::GPUPlace>); ::GPUPlace, float>);
\ No newline at end of file \ No newline at end of file
...@@ -20,11 +20,22 @@ ...@@ -20,11 +20,22 @@
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename Place> template <typename Place, typename T>
class MulKernel : public framework::OpKernel { class MulKernel : public framework::OpKernel {
public: public:
void Compute(const framework::KernelContext &context) const override { void Compute(const framework::KernelContext& context) const override {
LOG(INFO) << "Mul kernel in " << typeid(Place).name(); Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1> dim_pair;
dim_pair[0].first = 1;
dim_pair[0].second = 0;
auto input0 = context.Input(0)->Get<framework::Tensor>();
auto input1 = context.Input(1)->Get<framework::Tensor>();
auto* output = context.Output(0)->GetMutable<framework::Tensor>();
output->mutable_data<T>(context.GetPlace());
output->matrix<T>().device(*(context.GetEigenDevice<Place>())) =
input0.matrix<T>().contract(input1.matrix<T>(), dim_pair);
} }
}; };
} // namespace operators } // namespace operators
......
...@@ -58,4 +58,4 @@ REGISTER_OP(rowwise_add, ...@@ -58,4 +58,4 @@ REGISTER_OP(rowwise_add,
paddle::operators::RowWiseAddOpMaker); paddle::operators::RowWiseAddOpMaker);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
rowwise_add, rowwise_add,
paddle::operators::RowWiseAddKernel<paddle::platform::CPUPlace>); paddle::operators::RowWiseAddKernel<paddle::platform::CPUPlace, float>);
...@@ -3,4 +3,4 @@ ...@@ -3,4 +3,4 @@
REGISTER_OP_GPU_KERNEL( REGISTER_OP_GPU_KERNEL(
rowwise_add, rowwise_add,
paddle::operators::RowWiseAddKernel<paddle::platform ::GPUPlace>); paddle::operators::RowWiseAddKernel<paddle::platform ::GPUPlace, float>);
...@@ -19,11 +19,24 @@ ...@@ -19,11 +19,24 @@
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename Place> template <typename Place, typename T>
class RowWiseAddKernel : public framework::OpKernel { class RowWiseAddKernel : public framework::OpKernel {
public: public:
void Compute(const framework::KernelContext &context) const override { void Compute(const framework::KernelContext& context) const override {
LOG(INFO) << "RowWiseAdd kernel in " << typeid(Place).name(); auto in0 = context.Input(0)->Get<framework::Tensor>();
auto in1 = context.Input(1)->Get<framework::Tensor>();
auto* out = context.Output(0)->GetMutable<framework::Tensor>();
auto input = in0.matrix<T>();
auto bias = in1.vec<T>();
auto output = out->matrix<T>();
const int bias_size = bias.dimension(0);
const int rest_size = input.size() / bias_size;
Eigen::DSizes<int, 1> one_d(input.size());
Eigen::DSizes<int, 1> bcast(rest_size);
output.reshape(one_d).device(*(context.GetEigenDevice<Place>())) =
input.reshape(one_d) + bias.broadcast(bcast).reshape(one_d);
} }
}; };
......
...@@ -46,4 +46,5 @@ REGISTER_OP(sigmoid, ...@@ -46,4 +46,5 @@ REGISTER_OP(sigmoid,
paddle::operators::SigmoidOp, paddle::operators::SigmoidOp,
paddle::operators::SigmoidOpMaker); paddle::operators::SigmoidOpMaker);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
sigmoid, paddle::operators::SigmoidKernel<paddle::platform::CPUPlace>); sigmoid,
paddle::operators::SigmoidKernel<paddle::platform::CPUPlace, float>);
...@@ -2,4 +2,4 @@ ...@@ -2,4 +2,4 @@
#include <paddle/framework/op_registry.h> #include <paddle/framework/op_registry.h>
REGISTER_OP_GPU_KERNEL( REGISTER_OP_GPU_KERNEL(
sigmoid, paddle::operators::SigmoidKernel<paddle::platform::GPUPlace>); sigmoid, paddle::operators::SigmoidKernel<paddle::platform::GPUPlace, float>);
...@@ -20,11 +20,17 @@ ...@@ -20,11 +20,17 @@
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename Place> template <typename Place, typename T>
class SigmoidKernel : public framework::OpKernel { class SigmoidKernel : public framework::OpKernel {
public: public:
void Compute(const framework::KernelContext &context) const override { void Compute(const framework::KernelContext& context) const override {
LOG(INFO) << "Sigmoid kernel in " << typeid(Place).name(); auto input = context.Input(0)->Get<framework::Tensor>();
auto* output = context.Output(0)->GetMutable<framework::Tensor>();
output->mutable_data<T>(context.GetPlace());
output->flat<T>().device(*(context.GetEigenDevice<Place>())) =
1.0 / (1.0 + (-1.0 * input.flat<T>()).exp());
} }
}; };
} // namespace operators } // namespace operators
......
...@@ -23,6 +23,8 @@ protected: ...@@ -23,6 +23,8 @@ protected:
const std::vector<const framework::Tensor *> &inputs, const std::vector<const framework::Tensor *> &inputs,
const std::vector<framework::Tensor *> &outputs) const override { const std::vector<framework::Tensor *> &outputs) const override {
PADDLE_ENFORCE(inputs.size() == 1, "Only one input is need for softmax"); PADDLE_ENFORCE(inputs.size() == 1, "Only one input is need for softmax");
PADDLE_ENFORCE(inputs[0]->dims().size() == 2,
"The input of softmax op must be matrix");
PADDLE_ENFORCE(outputs.size() == 1, "Only one output is need for softmax"); PADDLE_ENFORCE(outputs.size() == 1, "Only one output is need for softmax");
outputs[0]->set_dims(inputs[0]->dims()); outputs[0]->set_dims(inputs[0]->dims());
...@@ -46,4 +48,5 @@ public: ...@@ -46,4 +48,5 @@ public:
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP(softmax, ops::SoftmaxOp, ops::SoftmaxOpMaker); REGISTER_OP(softmax, ops::SoftmaxOp, ops::SoftmaxOpMaker);
REGISTER_OP_CPU_KERNEL(softmax, ops::SoftmaxKernel<paddle::platform::CPUPlace>); REGISTER_OP_CPU_KERNEL(softmax,
ops::SoftmaxKernel<paddle::platform::CPUPlace, float>);
...@@ -2,4 +2,4 @@ ...@@ -2,4 +2,4 @@
#include <paddle/operators/softmax_op.h> #include <paddle/operators/softmax_op.h>
REGISTER_OP_GPU_KERNEL( REGISTER_OP_GPU_KERNEL(
softmax, paddle::operators::SoftmaxKernel<paddle::platform::GPUPlace>); softmax, paddle::operators::SoftmaxKernel<paddle::platform::GPUPlace, float>);
...@@ -20,11 +20,39 @@ ...@@ -20,11 +20,39 @@
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename Place> template <typename Place, typename T>
class SoftmaxKernel : public framework::OpKernel { class SoftmaxKernel : public framework::OpKernel {
public: public:
void Compute(const framework::KernelContext &context) const override { void Compute(const framework::KernelContext& context) const override {
LOG(INFO) << "Softmax kernel in " << typeid(Place).name(); auto input = context.Input(0)->Get<framework::Tensor>();
auto* output = context.Output(0)->GetMutable<framework::Tensor>();
auto logits = input.matrix<T>();
auto softmax = output->matrix<T>();
const int kBatchDim = 0;
const int kClassDim = 1;
const int batch_size = logits.dimension(kBatchDim);
const int num_classes = logits.dimension(kClassDim);
Eigen::DSizes<int, 1> along_class(kClassDim);
Eigen::DSizes<int, 2> batch_by_one(batch_size, 1);
Eigen::DSizes<int, 2> one_by_class(1, num_classes);
auto shifted_logits = (logits - logits.maximum(along_class)
.eval()
.reshape(batch_by_one)
.broadcast(one_by_class));
softmax.device(*(context.GetEigenDevice<Place>())) = shifted_logits.exp();
softmax.device(*(context.GetEigenDevice<Place>())) =
(softmax * softmax.sum(along_class)
.inverse()
.eval()
.reshape(batch_by_one)
.broadcast(one_by_class));
} }
}; };
} // namespace operators } // namespace operators
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册