未验证 提交 b1bb7484 编写于 作者: H huangjiyi 提交者: GitHub

update (#52879)

上级 e93e8a3f
......@@ -313,11 +313,10 @@ class GRUGradOp : public framework::OperatorWithKernel {
}
};
template <typename T>
template <typename T, typename DeviceContext>
class GRUCPUKernel : public framework::OpKernel<T> {
public:
void BatchCompute(const framework::ExecutionContext& context) const {
using DeviceContext = phi::CPUContext;
using LodTensorPtr = phi::DenseTensor*;
bool is_test = context.Attr<bool>("is_test");
......@@ -585,9 +584,8 @@ REGISTER_OPERATOR(gru,
REGISTER_OPERATOR(gru_grad,
ops::GRUGradOp,
ops::GRUGradOpNoNeedBufferVarInferer);
REGISTER_OP_CPU_KERNEL(gru,
ops::GRUCPUKernel<float>,
ops::GRUCPUKernel<double>);
REGISTER_OP_CPU_KERNEL(gru_grad,
ops::GRUGradKernel<phi::CPUContext, float>,
ops::GRUGradKernel<phi::CPUContext, double>);
PD_REGISTER_STRUCT_KERNEL(
gru, CPU, ALL_LAYOUT, ops::GRUCPUKernel, float, double) {}
PD_REGISTER_STRUCT_KERNEL(
gru_grad, CPU, ALL_LAYOUT, ops::GRUGradKernel, float, double) {}
......@@ -17,7 +17,7 @@ limitations under the License. */
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class GRUKernel : public framework::OpKernel<T> {
public:
void BatchCompute(const framework::ExecutionContext& context) const {
......@@ -133,9 +133,8 @@ class GRUKernel : public framework::OpKernel<T> {
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(gru,
ops::GRUKernel<phi::GPUContext, float>,
ops::GRUKernel<phi::GPUContext, double>);
REGISTER_OP_CUDA_KERNEL(gru_grad,
ops::GRUGradKernel<phi::GPUContext, float>,
ops::GRUGradKernel<phi::GPUContext, double>);
PD_REGISTER_STRUCT_KERNEL(gru, GPU, ALL_LAYOUT, ops::GRUKernel, float, double) {
}
PD_REGISTER_STRUCT_KERNEL(
gru_grad, GPU, ALL_LAYOUT, ops::GRUGradKernel, float, double) {}
......@@ -36,7 +36,7 @@ inline void ReorderInitState(const DeviceContext& ctx,
row_shuffle(ctx, src, index_lod, dst, indexed_src);
}
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class GRUGradKernel : public framework::OpKernel<T> {
public:
void BatchCompute(const framework::ExecutionContext& context) const {
......
......@@ -323,9 +323,7 @@ REGISTER_OPERATOR(gru_unit_grad,
ops::GRUUnitGradOp,
ops::GRUUnitGradOpNoNeedBufferVarInferer);
REGISTER_OP_CPU_KERNEL(gru_unit,
ops::GRUUnitKernel<phi::CPUContext, float>,
ops::GRUUnitKernel<phi::CPUContext, double>);
REGISTER_OP_CPU_KERNEL(gru_unit_grad,
ops::GRUUnitGradKernel<phi::CPUContext, float>,
ops::GRUUnitGradKernel<phi::CPUContext, double>);
PD_REGISTER_STRUCT_KERNEL(
gru_unit, CPU, ALL_LAYOUT, ops::GRUUnitKernel, float, double) {}
PD_REGISTER_STRUCT_KERNEL(
gru_unit_grad, CPU, ALL_LAYOUT, ops::GRUUnitGradKernel, float, double) {}
......@@ -14,9 +14,8 @@ limitations under the License. */
#include "paddle/fluid/operators/gru_unit_op.h"
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(gru_unit,
ops::GRUUnitKernel<phi::GPUContext, float>,
ops::GRUUnitKernel<phi::GPUContext, double>);
REGISTER_OP_CUDA_KERNEL(gru_unit_grad,
ops::GRUUnitGradKernel<phi::GPUContext, float>,
ops::GRUUnitGradKernel<phi::GPUContext, double>);
PD_REGISTER_STRUCT_KERNEL(
gru_unit, GPU, ALL_LAYOUT, ops::GRUUnitKernel, float, double) {}
PD_REGISTER_STRUCT_KERNEL(
gru_unit_grad, GPU, ALL_LAYOUT, ops::GRUUnitGradKernel, float, double) {}
......@@ -25,7 +25,7 @@ namespace operators {
enum GRUActivationType { identity = 0, sigmoid = 1, tanh = 2, relu = 3 };
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class GRUUnitKernel : public framework::OpKernel<T> {
public:
template <typename Device, typename X, typename Y>
......@@ -153,7 +153,7 @@ class GRUUnitKernel : public framework::OpKernel<T> {
}
};
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class GRUUnitGradKernel : public framework::OpKernel<T> {
public:
template <typename Device, typename X, typename Y, typename DX, typename DY>
......
......@@ -400,5 +400,7 @@ REGISTER_OPERATOR(lrn,
ops::LRNGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(lrn_grad, ops::LRNOpGrad);
REGISTER_OP_CPU_KERNEL(lrn, ops::LRNKernel<phi::CPUContext, float>);
REGISTER_OP_CPU_KERNEL(lrn_grad, ops::LRNGradKernel<phi::CPUContext, float>);
PD_REGISTER_STRUCT_KERNEL(lrn, CPU, ALL_LAYOUT, ops::LRNKernel, float) {}
PD_REGISTER_STRUCT_KERNEL(
lrn_grad, CPU, ALL_LAYOUT, ops::LRNGradKernel, float) {}
......@@ -274,5 +274,6 @@ template struct LRNGradFunctor<phi::GPUContext, double>;
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(lrn, ops::LRNKernel<phi::GPUContext, float>);
REGISTER_OP_CUDA_KERNEL(lrn_grad, ops::LRNGradKernel<phi::GPUContext, float>);
PD_REGISTER_STRUCT_KERNEL(lrn, GPU, ALL_LAYOUT, ops::LRNKernel, float) {}
PD_REGISTER_STRUCT_KERNEL(
lrn_grad, GPU, ALL_LAYOUT, ops::LRNGradKernel, float) {}
......@@ -43,7 +43,7 @@ struct LRNFunctor {
const DataLayout data_layout = DataLayout::kAnyLayout);
};
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class LRNKernel : public framework::OpKernel<T> {
public:
// f(x) = x * ( k + alpha * SUM((x)^2) )^(-beta)
......@@ -136,7 +136,7 @@ struct LRNGradFunctor {
* The upper and lower is the same as forward. The logic of the sum
* is also the same as forward.
*/
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class LRNGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......
......@@ -358,9 +358,8 @@ REGISTER_OPERATOR(lstm,
ops::LSTMGradOpMaker<paddle::framework::OpDesc>,
ops::LSTMGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(lstm_grad, ops::LSTMGradOp);
REGISTER_OP_CPU_KERNEL(lstm,
ops::LSTMKernel<phi::CPUContext, float>,
ops::LSTMKernel<phi::CPUContext, double>);
REGISTER_OP_CPU_KERNEL(lstm_grad,
ops::LSTMGradKernel<phi::CPUContext, float>,
ops::LSTMGradKernel<phi::CPUContext, double>);
PD_REGISTER_STRUCT_KERNEL(
lstm, CPU, ALL_LAYOUT, ops::LSTMKernel, float, double) {}
PD_REGISTER_STRUCT_KERNEL(
lstm_grad, CPU, ALL_LAYOUT, ops::LSTMGradKernel, float, double) {}
......@@ -15,9 +15,7 @@ limitations under the License. */
#include "paddle/fluid/operators/lstm_op.h"
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(lstm,
ops::LSTMKernel<phi::GPUContext, float>,
ops::LSTMKernel<phi::GPUContext, double>);
REGISTER_OP_CUDA_KERNEL(lstm_grad,
ops::LSTMGradKernel<phi::GPUContext, float>,
ops::LSTMGradKernel<phi::GPUContext, double>);
PD_REGISTER_STRUCT_KERNEL(
lstm, GPU, ALL_LAYOUT, ops::LSTMKernel, float, double) {}
PD_REGISTER_STRUCT_KERNEL(
lstm_grad, GPU, ALL_LAYOUT, ops::LSTMGradKernel, float, double) {}
......@@ -35,7 +35,7 @@ inline void ReorderInitState(const DeviceContext& ctx,
row_shuffle(ctx, src, index_lod, dst, indexed_src);
}
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class LSTMKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......@@ -197,7 +197,7 @@ class LSTMKernel : public framework::OpKernel<T> {
}
};
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class LSTMGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......
......@@ -142,10 +142,8 @@ REGISTER_OPERATOR(lstm_unit,
ops::LstmUnitGradOpMaker<paddle::framework::OpDesc>,
ops::LstmUnitGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(lstm_unit_grad, ops::LstmUnitGradOp);
REGISTER_OP_CPU_KERNEL(lstm_unit,
ops::LstmUnitKernel<paddle::platform::CPUPlace, float>,
ops::LstmUnitKernel<paddle::platform::CPUPlace, double>);
REGISTER_OP_CPU_KERNEL(
lstm_unit_grad,
ops::LstmUnitGradKernel<paddle::platform::CPUPlace, float>,
ops::LstmUnitGradKernel<paddle::platform::CPUPlace, double>);
PD_REGISTER_STRUCT_KERNEL(
lstm_unit, CPU, ALL_LAYOUT, ops::LstmUnitKernel, float, double) {}
PD_REGISTER_STRUCT_KERNEL(
lstm_unit_grad, CPU, ALL_LAYOUT, ops::LstmUnitGradKernel, float, double) {}
......@@ -98,7 +98,7 @@ __global__ void LSTMUnitGradientKernel(const int nthreads,
}
}
template <typename T>
template <typename T, typename DeviceContext>
class LstmUnitOpCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......@@ -131,7 +131,7 @@ class LstmUnitOpCUDAKernel : public framework::OpKernel<T> {
}
};
template <typename T>
template <typename T, typename DeviceContext>
class LstmUnitGradOpCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......@@ -183,9 +183,11 @@ class LstmUnitGradOpCUDAKernel : public framework::OpKernel<T> {
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(lstm_unit,
ops::LstmUnitOpCUDAKernel<float>,
ops::LstmUnitOpCUDAKernel<double>);
REGISTER_OP_CUDA_KERNEL(lstm_unit_grad,
ops::LstmUnitGradOpCUDAKernel<float>,
ops::LstmUnitGradOpCUDAKernel<double>);
PD_REGISTER_STRUCT_KERNEL(
lstm_unit, GPU, ALL_LAYOUT, ops::LstmUnitOpCUDAKernel, float, double) {}
PD_REGISTER_STRUCT_KERNEL(lstm_unit_grad,
GPU,
ALL_LAYOUT,
ops::LstmUnitGradOpCUDAKernel,
float,
double) {}
......@@ -33,7 +33,7 @@ inline T tanh(T x) {
return 2. * sigmoid(2. * x) - 1.;
}
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class LstmUnitKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......@@ -78,7 +78,7 @@ class LstmUnitKernel : public framework::OpKernel<T> {
}
};
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class LstmUnitGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......
......@@ -405,9 +405,7 @@ REGISTER_OPERATOR(lstmp,
ops::LSTMPGradMaker<paddle::framework::OpDesc>,
ops::LSTMPGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(lstmp_grad, ops::LSTMPGradOp);
REGISTER_OP_CPU_KERNEL(lstmp,
ops::LSTMPKernel<phi::CPUContext, float>,
ops::LSTMPKernel<phi::CPUContext, double>);
REGISTER_OP_CPU_KERNEL(lstmp_grad,
ops::LSTMPGradKernel<phi::CPUContext, float>,
ops::LSTMPGradKernel<phi::CPUContext, double>);
PD_REGISTER_STRUCT_KERNEL(
lstmp, CPU, ALL_LAYOUT, ops::LSTMPKernel, float, double) {}
PD_REGISTER_STRUCT_KERNEL(
lstmp_grad, CPU, ALL_LAYOUT, ops::LSTMPGradKernel, float, double) {}
......@@ -15,9 +15,7 @@ limitations under the License. */
#include "paddle/fluid/operators/lstmp_op.h"
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(lstmp,
ops::LSTMPKernel<phi::GPUContext, float>,
ops::LSTMPKernel<phi::GPUContext, double>);
REGISTER_OP_CUDA_KERNEL(lstmp_grad,
ops::LSTMPGradKernel<phi::GPUContext, float>,
ops::LSTMPGradKernel<phi::GPUContext, double>);
PD_REGISTER_STRUCT_KERNEL(
lstmp, GPU, ALL_LAYOUT, ops::LSTMPKernel, float, double) {}
PD_REGISTER_STRUCT_KERNEL(
lstmp_grad, GPU, ALL_LAYOUT, ops::LSTMPGradKernel, float, double) {}
......@@ -78,7 +78,7 @@ inline void ReorderInitState(const DeviceContext& ctx,
row_shuffle(ctx, src, index, dst, indexed_src);
}
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class LSTMPKernel : public framework::OpKernel<T> {
public:
template <typename Device, typename X, typename Y>
......@@ -279,7 +279,7 @@ class LSTMPKernel : public framework::OpKernel<T> {
}
};
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class LSTMPGradKernel : public framework::OpKernel<T> {
public:
template <typename Device, typename X, typename Y, typename DX, typename DY>
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册