未验证 提交 2944d3c0 编写于 作者: H huangjiyi 提交者: GitHub

update (#53036)

上级 93ff8e4c
...@@ -186,6 +186,6 @@ REGISTER_OPERATOR( ...@@ -186,6 +186,6 @@ REGISTER_OPERATOR(
ops::TargetAssignOpMaker, ops::TargetAssignOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>, paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>); paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(target_assign,
ops::TargetAssignKernel<phi::CPUContext, int, float>, PD_REGISTER_STRUCT_KERNEL(
ops::TargetAssignKernel<phi::CPUContext, float, float>); target_assign, CPU, ALL_LAYOUT, ops::TargetAssignKernel, int, float) {}
...@@ -65,6 +65,6 @@ template struct NegTargetAssignFunctor<phi::GPUContext, float, float>; ...@@ -65,6 +65,6 @@ template struct NegTargetAssignFunctor<phi::GPUContext, float, float>;
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(target_assign,
ops::TargetAssignKernel<phi::GPUContext, int, float>, PD_REGISTER_STRUCT_KERNEL(
ops::TargetAssignKernel<phi::GPUContext, float, float>); target_assign, GPU, ALL_LAYOUT, ops::TargetAssignKernel, int, float) {}
...@@ -92,7 +92,7 @@ struct NegTargetAssignFunctor { ...@@ -92,7 +92,7 @@ struct NegTargetAssignFunctor {
WT* out_wt) const; WT* out_wt) const;
}; };
template <typename DeviceContext, typename T, typename WT> template <typename T, typename DeviceContext, typename WT = float>
class TargetAssignKernel : public framework::OpKernel<T> { class TargetAssignKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
......
...@@ -221,8 +221,12 @@ REGISTER_OPERATOR( ...@@ -221,8 +221,12 @@ REGISTER_OPERATOR(
REGISTER_OPERATOR(squared_l2_distance_grad, REGISTER_OPERATOR(squared_l2_distance_grad,
ops::SquaredL2DistanceGradOp, ops::SquaredL2DistanceGradOp,
ops::SquaredL2DistanceGradOpNoBufferVarsInferer); ops::SquaredL2DistanceGradOpNoBufferVarsInferer);
REGISTER_OP_CPU_KERNEL(squared_l2_distance,
ops::SquaredL2DistanceKernel<phi::CPUContext, float>); PD_REGISTER_STRUCT_KERNEL(
REGISTER_OP_CPU_KERNEL( squared_l2_distance, CPU, ALL_LAYOUT, ops::SquaredL2DistanceKernel, float) {
squared_l2_distance_grad, }
ops::SquaredL2DistanceGradKernel<phi::CPUContext, float>); PD_REGISTER_STRUCT_KERNEL(squared_l2_distance_grad,
CPU,
ALL_LAYOUT,
ops::SquaredL2DistanceGradKernel,
float) {}
...@@ -14,8 +14,11 @@ limitations under the License. */ ...@@ -14,8 +14,11 @@ limitations under the License. */
#include "paddle/fluid/operators/squared_l2_distance_op.h" #include "paddle/fluid/operators/squared_l2_distance_op.h"
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(squared_l2_distance, PD_REGISTER_STRUCT_KERNEL(
ops::SquaredL2DistanceKernel<phi::GPUContext, float>); squared_l2_distance, GPU, ALL_LAYOUT, ops::SquaredL2DistanceKernel, float) {
REGISTER_OP_CUDA_KERNEL( }
squared_l2_distance_grad, PD_REGISTER_STRUCT_KERNEL(squared_l2_distance_grad,
ops::SquaredL2DistanceGradKernel<phi::GPUContext, float>); GPU,
ALL_LAYOUT,
ops::SquaredL2DistanceGradKernel,
float) {}
...@@ -19,7 +19,7 @@ limitations under the License. */ ...@@ -19,7 +19,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename DeviceContext, typename T> template <typename T, typename DeviceContext>
class SquaredL2DistanceKernel : public framework::OpKernel<T> { class SquaredL2DistanceKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
...@@ -60,7 +60,7 @@ class SquaredL2DistanceKernel : public framework::OpKernel<T> { ...@@ -60,7 +60,7 @@ class SquaredL2DistanceKernel : public framework::OpKernel<T> {
} }
}; };
template <typename DeviceContext, typename T> template <typename T, typename DeviceContext>
class SquaredL2DistanceGradKernel : public framework::OpKernel<T> { class SquaredL2DistanceGradKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
......
...@@ -119,9 +119,12 @@ REGISTER_OPERATOR( ...@@ -119,9 +119,12 @@ REGISTER_OPERATOR(
ops::TDMChildOpMaker, ops::TDMChildOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>, paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>); paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(
tdm_child, PD_REGISTER_STRUCT_KERNEL(tdm_child,
ops::TDMChildKernel<paddle::platform::CPUPlace, float>, CPU,
ops::TDMChildKernel<paddle::platform::CPUPlace, double>, ALL_LAYOUT,
ops::TDMChildKernel<paddle::platform::CPUPlace, int>, ops::TDMChildKernel,
ops::TDMChildKernel<paddle::platform::CPUPlace, int64_t>); float,
double,
int,
int64_t) {}
...@@ -105,7 +105,7 @@ void TDMChildInner(const framework::ExecutionContext &context, ...@@ -105,7 +105,7 @@ void TDMChildInner(const framework::ExecutionContext &context,
memcpy(leaf_mask_data, &item_mask_vec[0], sizeof(OutT) * output_nums); memcpy(leaf_mask_data, &item_mask_vec[0], sizeof(OutT) * output_nums);
} }
template <typename DeviceContext, typename T> template <typename T, typename DeviceContext>
class TDMChildKernel : public framework::OpKernel<T> { class TDMChildKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &ctx) const override { void Compute(const framework::ExecutionContext &ctx) const override {
......
...@@ -136,9 +136,12 @@ REGISTER_OPERATOR( ...@@ -136,9 +136,12 @@ REGISTER_OPERATOR(
ops::TDMSamplerOpMaker, ops::TDMSamplerOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>, paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>); paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(
tdm_sampler, PD_REGISTER_STRUCT_KERNEL(tdm_sampler,
ops::TDMSamplerKernel<paddle::platform::CPUPlace, float>, CPU,
ops::TDMSamplerKernel<paddle::platform::CPUPlace, double>, ALL_LAYOUT,
ops::TDMSamplerKernel<paddle::platform::CPUPlace, int>, ops::TDMSamplerKernel,
ops::TDMSamplerKernel<paddle::platform::CPUPlace, int64_t>); float,
double,
int,
int64_t) {}
...@@ -251,7 +251,7 @@ void TDMSamplerInner(const framework::ExecutionContext &context, ...@@ -251,7 +251,7 @@ void TDMSamplerInner(const framework::ExecutionContext &context,
} }
} }
template <typename DeviceContext, typename T> template <typename T, typename DeviceContext>
class TDMSamplerKernel : public framework::OpKernel<T> { class TDMSamplerKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &context) const override { void Compute(const framework::ExecutionContext &context) const override {
......
...@@ -249,10 +249,15 @@ REGISTER_OPERATOR( ...@@ -249,10 +249,15 @@ REGISTER_OPERATOR(
REGISTER_OPERATOR(teacher_student_sigmoid_loss_grad, REGISTER_OPERATOR(teacher_student_sigmoid_loss_grad,
ops::TeacherStudentSigmoidLossGradientOp); ops::TeacherStudentSigmoidLossGradientOp);
REGISTER_OP_CPU_KERNEL(teacher_student_sigmoid_loss, PD_REGISTER_STRUCT_KERNEL(teacher_student_sigmoid_loss,
ops::TeacherStudentSigmoidLossOpKernel<float>, CPU,
ops::TeacherStudentSigmoidLossOpKernel<double>); ALL_LAYOUT,
ops::TeacherStudentSigmoidLossOpKernel,
REGISTER_OP_CPU_KERNEL(teacher_student_sigmoid_loss_grad, float,
ops::TeacherStudentSigmoidLossGradOpKernel<float>, double) {}
ops::TeacherStudentSigmoidLossGradOpKernel<double>); PD_REGISTER_STRUCT_KERNEL(teacher_student_sigmoid_loss_grad,
CPU,
ALL_LAYOUT,
ops::TeacherStudentSigmoidLossGradOpKernel,
float,
double) {}
...@@ -19,7 +19,7 @@ limitations under the License. */ ...@@ -19,7 +19,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename T> template <typename T, typename DeviceContext>
class TeacherStudentSigmoidLossOpKernel : public framework::OpKernel<T> { class TeacherStudentSigmoidLossOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
...@@ -63,7 +63,7 @@ class TeacherStudentSigmoidLossOpKernel : public framework::OpKernel<T> { ...@@ -63,7 +63,7 @@ class TeacherStudentSigmoidLossOpKernel : public framework::OpKernel<T> {
} }
}; };
template <typename T> template <typename T, typename DeviceContext>
class TeacherStudentSigmoidLossGradOpKernel : public framework::OpKernel<T> { class TeacherStudentSigmoidLossGradOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
......
...@@ -152,7 +152,7 @@ __global__ void KeTemporalShiftBwNHWC(const T* output_grad, ...@@ -152,7 +152,7 @@ __global__ void KeTemporalShiftBwNHWC(const T* output_grad,
} }
} }
template <typename T> template <typename T, typename DeviceContext>
class TemporalShiftOpCUDAKernel : public framework::OpKernel<T> { class TemporalShiftOpCUDAKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
...@@ -208,7 +208,7 @@ class TemporalShiftOpCUDAKernel : public framework::OpKernel<T> { ...@@ -208,7 +208,7 @@ class TemporalShiftOpCUDAKernel : public framework::OpKernel<T> {
} }
}; };
template <typename T> template <typename T, typename DeviceContext>
class TemporalShiftGradOpCUDAKernel : public framework::OpKernel<T> { class TemporalShiftGradOpCUDAKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
...@@ -267,13 +267,19 @@ class TemporalShiftGradOpCUDAKernel : public framework::OpKernel<T> { ...@@ -267,13 +267,19 @@ class TemporalShiftGradOpCUDAKernel : public framework::OpKernel<T> {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL( namespace plat = paddle::platform;
temporal_shift,
ops::TemporalShiftOpCUDAKernel<float>, PD_REGISTER_STRUCT_KERNEL(temporal_shift,
ops::TemporalShiftOpCUDAKernel<double>, GPU,
ops::TemporalShiftOpCUDAKernel<paddle::platform::float16>); ALL_LAYOUT,
REGISTER_OP_CUDA_KERNEL( ops::TemporalShiftOpCUDAKernel,
temporal_shift_grad, float,
ops::TemporalShiftGradOpCUDAKernel<float>, double,
ops::TemporalShiftGradOpCUDAKernel<double>, plat::float16) {}
ops::TemporalShiftGradOpCUDAKernel<paddle::platform::float16>); PD_REGISTER_STRUCT_KERNEL(temporal_shift_grad,
GPU,
ALL_LAYOUT,
ops::TemporalShiftGradOpCUDAKernel,
float,
double,
plat::float16) {}
...@@ -234,10 +234,7 @@ REGISTER_OPERATOR(tree_conv, ...@@ -234,10 +234,7 @@ REGISTER_OPERATOR(tree_conv,
REGISTER_OPERATOR(tree_conv_grad, ops::TreeConvGradOp); REGISTER_OPERATOR(tree_conv_grad, ops::TreeConvGradOp);
REGISTER_OP_CPU_KERNEL(tree_conv, PD_REGISTER_STRUCT_KERNEL(
ops::TreeConvKernel<phi::CPUContext, float>, tree_conv, CPU, ALL_LAYOUT, ops::TreeConvKernel, float, double) {}
ops::TreeConvKernel<phi::CPUContext, double>); PD_REGISTER_STRUCT_KERNEL(
tree_conv_grad, CPU, ALL_LAYOUT, ops::TreeConvGradKernel, float, double) {}
REGISTER_OP_CPU_KERNEL(tree_conv_grad,
ops::TreeConvGradKernel<phi::CPUContext, float>,
ops::TreeConvGradKernel<phi::CPUContext, double>);
...@@ -15,9 +15,8 @@ ...@@ -15,9 +15,8 @@
#include "paddle/fluid/operators/tree_conv_op.h" #include "paddle/fluid/operators/tree_conv_op.h"
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(tree_conv,
ops::TreeConvKernel<phi::GPUContext, float>, PD_REGISTER_STRUCT_KERNEL(
ops::TreeConvKernel<phi::GPUContext, double>); tree_conv, GPU, ALL_LAYOUT, ops::TreeConvKernel, float, double) {}
REGISTER_OP_CUDA_KERNEL(tree_conv_grad, PD_REGISTER_STRUCT_KERNEL(
ops::TreeConvGradKernel<phi::GPUContext, float>, tree_conv_grad, GPU, ALL_LAYOUT, ops::TreeConvGradKernel, float, double) {}
ops::TreeConvGradKernel<phi::GPUContext, double>);
...@@ -23,7 +23,7 @@ ...@@ -23,7 +23,7 @@
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using DDim = framework::DDim; using DDim = framework::DDim;
template <typename DeviceContext, typename T> template <typename T, typename DeviceContext>
class TreeConvKernel : public framework::OpKernel<T> { class TreeConvKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &ctx) const override { void Compute(const framework::ExecutionContext &ctx) const override {
...@@ -73,7 +73,7 @@ class TreeConvKernel : public framework::OpKernel<T> { ...@@ -73,7 +73,7 @@ class TreeConvKernel : public framework::OpKernel<T> {
} }
} }
}; };
template <typename DeviceContext, typename T> template <typename T, typename DeviceContext>
class TreeConvGradKernel : public framework::OpKernel<T> { class TreeConvGradKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &ctx) const override { void Compute(const framework::ExecutionContext &ctx) const override {
......
...@@ -75,8 +75,11 @@ namespace ops = paddle::operators; ...@@ -75,8 +75,11 @@ namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(unique_with_counts, REGISTER_OP_WITHOUT_GRADIENT(unique_with_counts,
ops::UniqueWithCountsOp, ops::UniqueWithCountsOp,
ops::UniqueWithCountsOpMaker); ops::UniqueWithCountsOpMaker);
REGISTER_OP_CPU_KERNEL(unique_with_counts, PD_REGISTER_STRUCT_KERNEL(unique_with_counts,
ops::UniqueWithCountsKernel<float>, CPU,
ops::UniqueWithCountsKernel<double>, ALL_LAYOUT,
ops::UniqueWithCountsKernel<int32_t>, ops::UniqueWithCountsKernel,
ops::UniqueWithCountsKernel<int64_t>); float,
double,
int32_t,
int64_t) {}
...@@ -25,7 +25,7 @@ limitations under the License. */ ...@@ -25,7 +25,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename T> template <typename T, typename DeviceContext>
class UniqueWithCountsKernel : public framework::OpKernel<T> { class UniqueWithCountsKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
......
...@@ -176,7 +176,7 @@ void VarConv2dOP::InferShape(framework::InferShapeContext* ctx) const { ...@@ -176,7 +176,7 @@ void VarConv2dOP::InferShape(framework::InferShapeContext* ctx) const {
} }
} }
template <typename DeviceContext, typename T> template <typename T, typename DeviceContext>
class CPUVarConv2dOPKernel : public framework::OpKernel<T> { class CPUVarConv2dOPKernel : public framework::OpKernel<T> {
public: public:
void Im2Col(const framework::ExecutionContext& ctx, void Im2Col(const framework::ExecutionContext& ctx,
...@@ -392,7 +392,7 @@ void VarConv2dOpGrad::InferShape(framework::InferShapeContext* ctx) const { ...@@ -392,7 +392,7 @@ void VarConv2dOpGrad::InferShape(framework::InferShapeContext* ctx) const {
} }
} }
template <typename DeviceContext, typename T> template <typename T, typename DeviceContext>
class CPUVarConv2dOPGradKernel : public framework::OpKernel<T> { class CPUVarConv2dOPGradKernel : public framework::OpKernel<T> {
public: public:
void Im2ColGrad(const framework::ExecutionContext& ctx, T* top_diff) const { void Im2ColGrad(const framework::ExecutionContext& ctx, T* top_diff) const {
...@@ -532,11 +532,7 @@ REGISTER_OPERATOR(var_conv_2d, ...@@ -532,11 +532,7 @@ REGISTER_OPERATOR(var_conv_2d,
ops::VarConv2dGradMaker<paddle::imperative::OpBase>); ops::VarConv2dGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(var_conv_2d_grad, ops::VarConv2dOpGrad); REGISTER_OPERATOR(var_conv_2d_grad, ops::VarConv2dOpGrad);
REGISTER_OP_CPU_KERNEL(var_conv_2d, PD_REGISTER_STRUCT_KERNEL(
ops::CPUVarConv2dOPKernel<phi::CPUContext, float>); var_conv_2d, CPU, ALL_LAYOUT, ops::CPUVarConv2dOPKernel, float) {}
// ops::CPUVarConv2dOPKernel<phi::CPUContext, PD_REGISTER_STRUCT_KERNEL(
// double> var_conv_2d_grad, CPU, ALL_LAYOUT, ops::CPUVarConv2dOPGradKernel, float) {}
REGISTER_OP_CPU_KERNEL(var_conv_2d_grad,
ops::CPUVarConv2dOPGradKernel<phi::CPUContext, float>);
// ops::CPUVarConv2dOPGradKernel<phi::CPUContext,
// double>
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册