未验证 提交 2f499713 编写于 作者: H huangjiyi 提交者: GitHub

update (#52880)

上级 54e4360a
......@@ -79,4 +79,6 @@ class HashOpMaker : public framework::OpProtoAndCheckerMaker {
namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(hash, ops::HashOp, ops::HashOpMaker);
REGISTER_OP_CPU_KERNEL(hash, ops::HashKernel<int>, ops::HashKernel<int64_t>);
PD_REGISTER_STRUCT_KERNEL(
hash, CPU, ALL_LAYOUT, ops::HashKernel, int, int64_t) {}
......@@ -38,7 +38,7 @@ inline void HashOutputSize(const framework::DDim& in_dims,
out_dims.emplace_back(1);
}
template <typename T>
template <typename T, typename DeviceContext>
class HashKernel : public framework::OpKernel<T> {
public:
virtual void Compute(const framework::ExecutionContext& context) const {
......
......@@ -150,12 +150,15 @@ REGISTER_OPERATOR(hinge_loss,
ops::HingeLossGradOpMaker<paddle::framework::OpDesc>,
ops::HingeLossGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(hinge_loss_grad, ops::HingeLossGradOp);
REGISTER_OP_CPU_KERNEL(hinge_loss,
ops::HingeLossKernel<phi::CPUContext, float>);
REGISTER_OP_CPU_KERNEL(hinge_loss_grad,
ops::HingeLossGradKernel<phi::CPUContext, float>);
REGISTER_OP_CUDA_KERNEL(hinge_loss,
ops::HingeLossKernel<phi::GPUContext, float>);
REGISTER_OP_CUDA_KERNEL(hinge_loss_grad,
ops::HingeLossGradKernel<phi::GPUContext, float>);
PD_REGISTER_STRUCT_KERNEL(
hinge_loss, CPU, ALL_LAYOUT, ops::HingeLossKernel, float) {}
PD_REGISTER_STRUCT_KERNEL(
hinge_loss_grad, CPU, ALL_LAYOUT, ops::HingeLossGradKernel, float) {}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PD_REGISTER_STRUCT_KERNEL(
hinge_loss, GPU, ALL_LAYOUT, ops::HingeLossKernel, float) {}
PD_REGISTER_STRUCT_KERNEL(
hinge_loss_grad, GPU, ALL_LAYOUT, ops::HingeLossGradKernel, float) {}
#endif
......@@ -20,7 +20,7 @@ limitations under the License. */
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T, typename AttrType = T>
template <typename T, typename DeviceContext, typename AttrType = T>
class HingeLossKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
......@@ -38,7 +38,7 @@ class HingeLossKernel : public framework::OpKernel<T> {
}
};
template <typename DeviceContext, typename T, typename AttrType = T>
template <typename T, typename DeviceContext, typename AttrType = T>
class HingeLossGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
......
......@@ -195,12 +195,15 @@ REGISTER_OPERATOR(im2sequence,
ops::Im2SequenceGradMaker<paddle::framework::OpDesc>,
ops::Im2SequenceGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(im2sequence_grad, ops::Im2SequenceGradOp);
REGISTER_OP_CPU_KERNEL(im2sequence,
ops::Im2SequenceKernel<phi::CPUContext, float>);
REGISTER_OP_CPU_KERNEL(im2sequence_grad,
ops::Im2SequenceGradKernel<phi::CPUContext, float>);
REGISTER_OP_CUDA_KERNEL(im2sequence,
ops::Im2SequenceKernel<phi::GPUContext, float>);
REGISTER_OP_CUDA_KERNEL(im2sequence_grad,
ops::Im2SequenceGradKernel<phi::GPUContext, float>);
PD_REGISTER_STRUCT_KERNEL(
im2sequence, CPU, ALL_LAYOUT, ops::Im2SequenceKernel, float) {}
PD_REGISTER_STRUCT_KERNEL(
im2sequence_grad, CPU, ALL_LAYOUT, ops::Im2SequenceGradKernel, float) {}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PD_REGISTER_STRUCT_KERNEL(
im2sequence, GPU, ALL_LAYOUT, ops::Im2SequenceKernel, float) {}
PD_REGISTER_STRUCT_KERNEL(
im2sequence_grad, GPU, ALL_LAYOUT, ops::Im2SequenceGradKernel, float) {}
#endif
......@@ -33,7 +33,7 @@ inline int Im2SeqOutputSize(
return output_size;
}
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class Im2SequenceKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......@@ -153,7 +153,7 @@ class Im2SequenceKernel : public framework::OpKernel<T> {
}
};
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class Im2SequenceGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......
......@@ -210,7 +210,7 @@ class InplaceABNOpGradMaker : public framework::SingleGradOpMaker<T> {
}
};
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class InplaceABNKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......@@ -270,7 +270,7 @@ class InplaceABNKernel : public framework::OpKernel<T> {
}
};
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class InplaceABNGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......@@ -373,9 +373,11 @@ REGISTER_OPERATOR(inplace_abn,
InplaceAbnOpInplaceInferer)
REGISTER_OPERATOR(inplace_abn_grad, ops::InplaceABNGradOp)
REGISTER_OP_CPU_KERNEL(inplace_abn,
ops::InplaceABNKernel<phi::CPUContext, float>,
ops::InplaceABNKernel<phi::CPUContext, double>);
REGISTER_OP_CPU_KERNEL(inplace_abn_grad,
ops::InplaceABNGradKernel<phi::CPUContext, float>,
ops::InplaceABNGradKernel<phi::CPUContext, double>);
PD_REGISTER_STRUCT_KERNEL(
inplace_abn, CPU, ALL_LAYOUT, ops::InplaceABNKernel, float, double) {}
PD_REGISTER_STRUCT_KERNEL(inplace_abn_grad,
CPU,
ALL_LAYOUT,
ops::InplaceABNGradKernel,
float,
double) {}
......@@ -23,7 +23,7 @@ limitations under the License. */
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class InplaceABNKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......@@ -109,7 +109,7 @@ class InplaceABNKernel : public framework::OpKernel<T> {
// Deriving the Gradient for the Backward Pass of Batch Normalization
// https://kevinzakka.github.io/2016/09/14/batch_normalization/
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class InplaceABNGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......@@ -221,15 +221,17 @@ namespace ops = paddle::operators;
namespace plat = paddle::platform;
#ifdef PADDLE_WITH_HIP
// MIOPEN do not support double
REGISTER_OP_CUDA_KERNEL(inplace_abn,
ops::InplaceABNKernel<phi::GPUContext, float>);
REGISTER_OP_CUDA_KERNEL(inplace_abn_grad,
ops::InplaceABNGradKernel<phi::GPUContext, float>);
PD_REGISTER_STRUCT_KERNEL(
inplace_abn, GPU, ALL_LAYOUT, ops::InplaceABNKernel, float) {}
PD_REGISTER_STRUCT_KERNEL(
inplace_abn_grad, GPU, ALL_LAYOUT, ops::InplaceABNGradKernel, float) {}
#else
REGISTER_OP_CUDA_KERNEL(inplace_abn,
ops::InplaceABNKernel<phi::GPUContext, float>,
ops::InplaceABNKernel<phi::GPUContext, double>);
REGISTER_OP_CUDA_KERNEL(inplace_abn_grad,
ops::InplaceABNGradKernel<phi::GPUContext, float>,
ops::InplaceABNGradKernel<phi::GPUContext, double>);
PD_REGISTER_STRUCT_KERNEL(
inplace_abn, GPU, ALL_LAYOUT, ops::InplaceABNKernel, float, double) {}
PD_REGISTER_STRUCT_KERNEL(inplace_abn_grad,
GPU,
ALL_LAYOUT,
ops::InplaceABNGradKernel,
float,
double) {}
#endif
......@@ -77,10 +77,13 @@ class LimitByCapacityOpMaker : public framework::OpProtoAndCheckerMaker {
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_CPU_KERNEL(limit_by_capacity,
ops::LimitByCapacityOpCPUKernel<int>,
ops::LimitByCapacityOpCPUKernel<int64_t>);
REGISTER_OP_WITHOUT_GRADIENT(limit_by_capacity,
ops::LimitByCapacityOp,
ops::LimitByCapacityOpMaker);
PD_REGISTER_STRUCT_KERNEL(limit_by_capacity,
CPU,
ALL_LAYOUT,
ops::LimitByCapacityOpCPUKernel,
int,
int64_t) {}
......@@ -47,7 +47,7 @@ __global__ void limit_by_capacity_impl(
}
}
template <typename T>
template <typename T, typename DeviceContext>
class LimitByCapacityOpCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
......@@ -78,7 +78,8 @@ class LimitByCapacityOpCUDAKernel : public framework::OpKernel<T> {
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_CUDA_KERNEL(limit_by_capacity,
ops::LimitByCapacityOpCUDAKernel<int64_t>);
PD_REGISTER_STRUCT_KERNEL(limit_by_capacity,
GPU,
ALL_LAYOUT,
ops::LimitByCapacityOpCUDAKernel,
int64_t) {}
......@@ -24,7 +24,7 @@
namespace paddle {
namespace operators {
template <typename T>
template <typename T, typename DeviceContext>
class LimitByCapacityOpCPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......
......@@ -395,10 +395,16 @@ REGISTER_OPERATOR(linear_chain_crf,
REGISTER_OPERATOR(linear_chain_crf_grad,
ops::LinearChainCRFGradOp,
ops::LinearChainCRFGradNoNeedBufferVarsInferer);
REGISTER_OP_CPU_KERNEL(linear_chain_crf,
ops::LinearChainCRFOpKernel<phi::CPUContext, float>,
ops::LinearChainCRFOpKernel<phi::CPUContext, double>);
REGISTER_OP_CPU_KERNEL(
linear_chain_crf_grad,
ops::LinearChainCRFGradOpKernel<phi::CPUContext, float>,
ops::LinearChainCRFGradOpKernel<phi::CPUContext, double>);
PD_REGISTER_STRUCT_KERNEL(linear_chain_crf,
CPU,
ALL_LAYOUT,
ops::LinearChainCRFOpKernel,
float,
double) {}
PD_REGISTER_STRUCT_KERNEL(linear_chain_crf_grad,
CPU,
ALL_LAYOUT,
ops::LinearChainCRFGradOpKernel,
float,
double) {}
......@@ -48,7 +48,7 @@ struct ScalarMul {
using framework::LoD;
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class LinearChainCRFOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......@@ -245,7 +245,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {
}
};
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class LinearChainCRFGradOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......
......@@ -181,7 +181,11 @@ REGISTER_OPERATOR(margin_rank_loss,
ops::MarginRankLossGradMaker<paddle::framework::OpDesc>,
ops::MarginRankLossGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(margin_rank_loss_grad, ops::MarginRankLossGradOp);
REGISTER_OP_CPU_KERNEL(margin_rank_loss,
ops::MarginRankLossKernel<phi::CPUContext, float>);
REGISTER_OP_CPU_KERNEL(margin_rank_loss_grad,
ops::MarginRankLossGradKernel<phi::CPUContext, float>);
PD_REGISTER_STRUCT_KERNEL(
margin_rank_loss, CPU, ALL_LAYOUT, ops::MarginRankLossKernel, float) {}
PD_REGISTER_STRUCT_KERNEL(margin_rank_loss_grad,
CPU,
ALL_LAYOUT,
ops::MarginRankLossGradKernel,
float) {}
......@@ -16,7 +16,10 @@ limitations under the License. */
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(margin_rank_loss,
ops::MarginRankLossKernel<phi::GPUContext, float>);
REGISTER_OP_CUDA_KERNEL(margin_rank_loss_grad,
ops::MarginRankLossGradKernel<phi::GPUContext, float>);
PD_REGISTER_STRUCT_KERNEL(
margin_rank_loss, GPU, ALL_LAYOUT, ops::MarginRankLossKernel, float) {}
PD_REGISTER_STRUCT_KERNEL(margin_rank_loss_grad,
GPU,
ALL_LAYOUT,
ops::MarginRankLossGradKernel,
float) {}
......@@ -34,7 +34,7 @@ struct Heaviside {
}
};
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class MarginRankLossKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const {
......@@ -62,7 +62,7 @@ class MarginRankLossKernel : public framework::OpKernel<T> {
}
};
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class MarginRankLossGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const {
......
......@@ -176,7 +176,11 @@ REGISTER_OPERATOR(
ops::ModifiedHuberLossGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(modified_huber_loss_grad, ops::ModifiedHuberLossGradOp);
REGISTER_OP_CPU_KERNEL(modified_huber_loss,
ops::ModifiedHuberLossKernel<phi::CPUContext, float>);
REGISTER_OP_CPU_KERNEL(modified_huber_loss_grad,
ops::ModifiedHuberLossGradCPUKernel<float>);
PD_REGISTER_STRUCT_KERNEL(
modified_huber_loss, CPU, ALL_LAYOUT, ops::ModifiedHuberLossKernel, float) {
}
PD_REGISTER_STRUCT_KERNEL(modified_huber_loss_grad,
CPU,
ALL_LAYOUT,
ops::ModifiedHuberLossGradCPUKernel,
float) {}
......@@ -39,7 +39,7 @@ struct ModifiedHuberLossBackward {
}
};
template <typename T>
template <typename T, typename DeviceContext>
class ModifiedHuberLossGradGPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
......@@ -74,7 +74,12 @@ class ModifiedHuberLossGradGPUKernel : public framework::OpKernel<T> {
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(modified_huber_loss,
ops::ModifiedHuberLossKernel<phi::GPUContext, float>);
REGISTER_OP_CUDA_KERNEL(modified_huber_loss_grad,
ops::ModifiedHuberLossGradGPUKernel<float>);
PD_REGISTER_STRUCT_KERNEL(
modified_huber_loss, GPU, ALL_LAYOUT, ops::ModifiedHuberLossKernel, float) {
}
PD_REGISTER_STRUCT_KERNEL(modified_huber_loss_grad,
GPU,
ALL_LAYOUT,
ops::ModifiedHuberLossGradGPUKernel,
float) {}
......@@ -52,7 +52,7 @@ struct ModifiedHuberLossForward {
}
};
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class ModifiedHuberLossKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
......@@ -79,7 +79,7 @@ class ModifiedHuberLossKernel : public framework::OpKernel<T> {
};
// CPU backward kernel
template <typename T>
template <typename T, typename DeviceContext>
class ModifiedHuberLossGradCPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册