提交 b6c07552 编写于 作者: Q qijun

implement some basic OpKernel

上级 3208914b
......@@ -53,6 +53,5 @@ The equation is: Out = X + Y
} // namespace paddle
REGISTER_OP(add_two, paddle::operators::AddOp, paddle::operators::AddOpMaker);
typedef paddle::operators::AddKernel<::paddle::platform::CPUPlace, float>
AddKernel_CPU_float;
REGISTER_OP_CPU_KERNEL(add_two, AddKernel_CPU_float);
REGISTER_OP_CPU_KERNEL(
add_two, paddle::operators::AddKernel<paddle::platform::CPUPlace, float>);
#include "paddle/operators/add_op.h"
#include "paddle/framework/op_registry.h"
typedef paddle::operators::AddKernel<::paddle::platform::GPUPlace, float> AddKernel_GPU_float;
REGISTER_OP_GPU_KERNEL(add_two,
AddKernel_GPU_float);
\ No newline at end of file
paddle::operators::AddKernel<paddle::platform::GPUPlace, float>);
\ No newline at end of file
......@@ -57,4 +57,4 @@ The equation is: Out = X * Y
REGISTER_OP(mul, paddle::operators::MulOp, paddle::operators::MulOpMaker);
REGISTER_OP_CPU_KERNEL(
mul, paddle::operators::MulKernel<paddle::platform::CPUPlace>);
mul, paddle::operators::MulKernel<paddle::platform::CPUPlace, float>);
......@@ -17,4 +17,4 @@
REGISTER_OP_GPU_KERNEL(mul,
paddle::operators::MulKernel<paddle::platform
::GPUPlace>);
\ No newline at end of file
::GPUPlace, float>);
\ No newline at end of file
......@@ -20,11 +20,22 @@
namespace paddle {
namespace operators {
template <typename Place>
template <typename Place, typename T>
class MulKernel : public framework::OpKernel {
public:
void Compute(const framework::KernelContext &context) const override {
LOG(INFO) << "Mul kernel in " << typeid(Place).name();
void Compute(const framework::KernelContext& context) const override {
Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1> dim_pair;
dim_pair[0].first = 1;
dim_pair[0].second = 0;
auto input0 = context.Input(0)->Get<framework::Tensor>();
auto input1 = context.Input(1)->Get<framework::Tensor>();
auto* output = context.Output(0)->GetMutable<framework::Tensor>();
output->mutable_data<T>(context.GetPlace());
output->matrix<T>().device(*(context.GetEigenDevice<Place>())) =
input0.matrix<T>().contract(input1.matrix<T>(), dim_pair);
}
};
} // namespace operators
......
......@@ -58,4 +58,4 @@ REGISTER_OP(rowwise_add,
paddle::operators::RowWiseAddOpMaker);
REGISTER_OP_CPU_KERNEL(
rowwise_add,
paddle::operators::RowWiseAddKernel<paddle::platform::CPUPlace>);
paddle::operators::RowWiseAddKernel<paddle::platform::CPUPlace, float>);
......@@ -3,4 +3,4 @@
REGISTER_OP_GPU_KERNEL(
rowwise_add,
paddle::operators::RowWiseAddKernel<paddle::platform ::GPUPlace>);
paddle::operators::RowWiseAddKernel<paddle::platform ::GPUPlace, float>);
......@@ -19,11 +19,24 @@
namespace paddle {
namespace operators {
template <typename Place>
template <typename Place, typename T>
class RowWiseAddKernel : public framework::OpKernel {
public:
void Compute(const framework::KernelContext &context) const override {
LOG(INFO) << "RowWiseAdd kernel in " << typeid(Place).name();
void Compute(const framework::KernelContext& context) const override {
auto in0 = context.Input(0)->Get<framework::Tensor>();
auto in1 = context.Input(1)->Get<framework::Tensor>();
auto* out = context.Output(0)->GetMutable<framework::Tensor>();
auto input = in0.matrix<T>();
auto bias = in1.vec<T>();
auto output = out->matrix<T>();
const int bias_size = bias.dimension(0);
const int rest_size = input.size() / bias_size;
Eigen::DSizes<int, 1> one_d(input.size());
Eigen::DSizes<int, 1> bcast(rest_size);
output.reshape(one_d).device(*(context.GetEigenDevice<Place>())) =
input.reshape(one_d) + bias.broadcast(bcast).reshape(one_d);
}
};
......
......@@ -46,4 +46,5 @@ REGISTER_OP(sigmoid,
paddle::operators::SigmoidOp,
paddle::operators::SigmoidOpMaker);
REGISTER_OP_CPU_KERNEL(
sigmoid, paddle::operators::SigmoidKernel<paddle::platform::CPUPlace>);
sigmoid,
paddle::operators::SigmoidKernel<paddle::platform::CPUPlace, float>);
......@@ -2,4 +2,4 @@
#include <paddle/framework/op_registry.h>
REGISTER_OP_GPU_KERNEL(
sigmoid, paddle::operators::SigmoidKernel<paddle::platform::GPUPlace>);
sigmoid, paddle::operators::SigmoidKernel<paddle::platform::GPUPlace, float>);
......@@ -20,11 +20,17 @@
namespace paddle {
namespace operators {
template <typename Place>
template <typename Place, typename T>
class SigmoidKernel : public framework::OpKernel {
public:
void Compute(const framework::KernelContext &context) const override {
LOG(INFO) << "Sigmoid kernel in " << typeid(Place).name();
void Compute(const framework::KernelContext& context) const override {
auto input = context.Input(0)->Get<framework::Tensor>();
auto* output = context.Output(0)->GetMutable<framework::Tensor>();
output->mutable_data<T>(context.GetPlace());
output->flat<T>().device(*(context.GetEigenDevice<Place>())) =
1.0 / (1.0 + (-1.0 * input.flat<T>()).exp());
}
};
} // namespace operators
......
......@@ -23,6 +23,8 @@ protected:
const std::vector<const framework::Tensor *> &inputs,
const std::vector<framework::Tensor *> &outputs) const override {
PADDLE_ENFORCE(inputs.size() == 1, "Only one input is need for softmax");
PADDLE_ENFORCE(inputs[0]->dims().size() == 2,
"The input of softmax op must be matrix");
PADDLE_ENFORCE(outputs.size() == 1, "Only one output is need for softmax");
outputs[0]->set_dims(inputs[0]->dims());
......@@ -46,4 +48,5 @@ public:
namespace ops = paddle::operators;
REGISTER_OP(softmax, ops::SoftmaxOp, ops::SoftmaxOpMaker);
REGISTER_OP_CPU_KERNEL(softmax, ops::SoftmaxKernel<paddle::platform::CPUPlace>);
REGISTER_OP_CPU_KERNEL(softmax,
ops::SoftmaxKernel<paddle::platform::CPUPlace, float>);
......@@ -2,4 +2,4 @@
#include <paddle/operators/softmax_op.h>
REGISTER_OP_GPU_KERNEL(
softmax, paddle::operators::SoftmaxKernel<paddle::platform::GPUPlace>);
softmax, paddle::operators::SoftmaxKernel<paddle::platform::GPUPlace, float>);
......@@ -20,11 +20,39 @@
namespace paddle {
namespace operators {
template <typename Place>
template <typename Place, typename T>
class SoftmaxKernel : public framework::OpKernel {
public:
void Compute(const framework::KernelContext &context) const override {
LOG(INFO) << "Softmax kernel in " << typeid(Place).name();
void Compute(const framework::KernelContext& context) const override {
auto input = context.Input(0)->Get<framework::Tensor>();
auto* output = context.Output(0)->GetMutable<framework::Tensor>();
auto logits = input.matrix<T>();
auto softmax = output->matrix<T>();
const int kBatchDim = 0;
const int kClassDim = 1;
const int batch_size = logits.dimension(kBatchDim);
const int num_classes = logits.dimension(kClassDim);
Eigen::DSizes<int, 1> along_class(kClassDim);
Eigen::DSizes<int, 2> batch_by_one(batch_size, 1);
Eigen::DSizes<int, 2> one_by_class(1, num_classes);
auto shifted_logits = (logits - logits.maximum(along_class)
.eval()
.reshape(batch_by_one)
.broadcast(one_by_class));
softmax.device(*(context.GetEigenDevice<Place>())) = shifted_logits.exp();
softmax.device(*(context.GetEigenDevice<Place>())) =
(softmax * softmax.sum(along_class)
.inverse()
.eval()
.reshape(batch_by_one)
.broadcast(one_by_class));
}
};
} // namespace operators
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册