未验证 提交 2944d3c0 编写于 作者: H huangjiyi 提交者: GitHub

update (#53036)

上级 93ff8e4c
......@@ -186,6 +186,6 @@ REGISTER_OPERATOR(
ops::TargetAssignOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(target_assign,
ops::TargetAssignKernel<phi::CPUContext, int, float>,
ops::TargetAssignKernel<phi::CPUContext, float, float>);
PD_REGISTER_STRUCT_KERNEL(
target_assign, CPU, ALL_LAYOUT, ops::TargetAssignKernel, int, float) {}
......@@ -65,6 +65,6 @@ template struct NegTargetAssignFunctor<phi::GPUContext, float, float>;
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(target_assign,
ops::TargetAssignKernel<phi::GPUContext, int, float>,
ops::TargetAssignKernel<phi::GPUContext, float, float>);
PD_REGISTER_STRUCT_KERNEL(
target_assign, GPU, ALL_LAYOUT, ops::TargetAssignKernel, int, float) {}
......@@ -92,7 +92,7 @@ struct NegTargetAssignFunctor {
WT* out_wt) const;
};
template <typename DeviceContext, typename T, typename WT>
template <typename T, typename DeviceContext, typename WT = float>
class TargetAssignKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......
......@@ -221,8 +221,12 @@ REGISTER_OPERATOR(
REGISTER_OPERATOR(squared_l2_distance_grad,
ops::SquaredL2DistanceGradOp,
ops::SquaredL2DistanceGradOpNoBufferVarsInferer);
REGISTER_OP_CPU_KERNEL(squared_l2_distance,
ops::SquaredL2DistanceKernel<phi::CPUContext, float>);
REGISTER_OP_CPU_KERNEL(
squared_l2_distance_grad,
ops::SquaredL2DistanceGradKernel<phi::CPUContext, float>);
PD_REGISTER_STRUCT_KERNEL(
squared_l2_distance, CPU, ALL_LAYOUT, ops::SquaredL2DistanceKernel, float) {
}
PD_REGISTER_STRUCT_KERNEL(squared_l2_distance_grad,
CPU,
ALL_LAYOUT,
ops::SquaredL2DistanceGradKernel,
float) {}
......@@ -14,8 +14,11 @@ limitations under the License. */
#include "paddle/fluid/operators/squared_l2_distance_op.h"
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(squared_l2_distance,
ops::SquaredL2DistanceKernel<phi::GPUContext, float>);
REGISTER_OP_CUDA_KERNEL(
squared_l2_distance_grad,
ops::SquaredL2DistanceGradKernel<phi::GPUContext, float>);
PD_REGISTER_STRUCT_KERNEL(
squared_l2_distance, GPU, ALL_LAYOUT, ops::SquaredL2DistanceKernel, float) {
}
PD_REGISTER_STRUCT_KERNEL(squared_l2_distance_grad,
GPU,
ALL_LAYOUT,
ops::SquaredL2DistanceGradKernel,
float) {}
......@@ -19,7 +19,7 @@ limitations under the License. */
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class SquaredL2DistanceKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
......@@ -60,7 +60,7 @@ class SquaredL2DistanceKernel : public framework::OpKernel<T> {
}
};
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class SquaredL2DistanceGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
......
......@@ -119,9 +119,12 @@ REGISTER_OPERATOR(
ops::TDMChildOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(
tdm_child,
ops::TDMChildKernel<paddle::platform::CPUPlace, float>,
ops::TDMChildKernel<paddle::platform::CPUPlace, double>,
ops::TDMChildKernel<paddle::platform::CPUPlace, int>,
ops::TDMChildKernel<paddle::platform::CPUPlace, int64_t>);
PD_REGISTER_STRUCT_KERNEL(tdm_child,
CPU,
ALL_LAYOUT,
ops::TDMChildKernel,
float,
double,
int,
int64_t) {}
......@@ -105,7 +105,7 @@ void TDMChildInner(const framework::ExecutionContext &context,
memcpy(leaf_mask_data, &item_mask_vec[0], sizeof(OutT) * output_nums);
}
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class TDMChildKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
......
......@@ -136,9 +136,12 @@ REGISTER_OPERATOR(
ops::TDMSamplerOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(
tdm_sampler,
ops::TDMSamplerKernel<paddle::platform::CPUPlace, float>,
ops::TDMSamplerKernel<paddle::platform::CPUPlace, double>,
ops::TDMSamplerKernel<paddle::platform::CPUPlace, int>,
ops::TDMSamplerKernel<paddle::platform::CPUPlace, int64_t>);
PD_REGISTER_STRUCT_KERNEL(tdm_sampler,
CPU,
ALL_LAYOUT,
ops::TDMSamplerKernel,
float,
double,
int,
int64_t) {}
......@@ -251,7 +251,7 @@ void TDMSamplerInner(const framework::ExecutionContext &context,
}
}
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class TDMSamplerKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &context) const override {
......
......@@ -249,10 +249,15 @@ REGISTER_OPERATOR(
REGISTER_OPERATOR(teacher_student_sigmoid_loss_grad,
ops::TeacherStudentSigmoidLossGradientOp);
REGISTER_OP_CPU_KERNEL(teacher_student_sigmoid_loss,
ops::TeacherStudentSigmoidLossOpKernel<float>,
ops::TeacherStudentSigmoidLossOpKernel<double>);
REGISTER_OP_CPU_KERNEL(teacher_student_sigmoid_loss_grad,
ops::TeacherStudentSigmoidLossGradOpKernel<float>,
ops::TeacherStudentSigmoidLossGradOpKernel<double>);
PD_REGISTER_STRUCT_KERNEL(teacher_student_sigmoid_loss,
CPU,
ALL_LAYOUT,
ops::TeacherStudentSigmoidLossOpKernel,
float,
double) {}
PD_REGISTER_STRUCT_KERNEL(teacher_student_sigmoid_loss_grad,
CPU,
ALL_LAYOUT,
ops::TeacherStudentSigmoidLossGradOpKernel,
float,
double) {}
......@@ -19,7 +19,7 @@ limitations under the License. */
namespace paddle {
namespace operators {
template <typename T>
template <typename T, typename DeviceContext>
class TeacherStudentSigmoidLossOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
......@@ -63,7 +63,7 @@ class TeacherStudentSigmoidLossOpKernel : public framework::OpKernel<T> {
}
};
template <typename T>
template <typename T, typename DeviceContext>
class TeacherStudentSigmoidLossGradOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
......
......@@ -152,7 +152,7 @@ __global__ void KeTemporalShiftBwNHWC(const T* output_grad,
}
}
template <typename T>
template <typename T, typename DeviceContext>
class TemporalShiftOpCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......@@ -208,7 +208,7 @@ class TemporalShiftOpCUDAKernel : public framework::OpKernel<T> {
}
};
template <typename T>
template <typename T, typename DeviceContext>
class TemporalShiftGradOpCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......@@ -267,13 +267,19 @@ class TemporalShiftGradOpCUDAKernel : public framework::OpKernel<T> {
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
temporal_shift,
ops::TemporalShiftOpCUDAKernel<float>,
ops::TemporalShiftOpCUDAKernel<double>,
ops::TemporalShiftOpCUDAKernel<paddle::platform::float16>);
REGISTER_OP_CUDA_KERNEL(
temporal_shift_grad,
ops::TemporalShiftGradOpCUDAKernel<float>,
ops::TemporalShiftGradOpCUDAKernel<double>,
ops::TemporalShiftGradOpCUDAKernel<paddle::platform::float16>);
namespace plat = paddle::platform;
PD_REGISTER_STRUCT_KERNEL(temporal_shift,
GPU,
ALL_LAYOUT,
ops::TemporalShiftOpCUDAKernel,
float,
double,
plat::float16) {}
PD_REGISTER_STRUCT_KERNEL(temporal_shift_grad,
GPU,
ALL_LAYOUT,
ops::TemporalShiftGradOpCUDAKernel,
float,
double,
plat::float16) {}
......@@ -234,10 +234,7 @@ REGISTER_OPERATOR(tree_conv,
REGISTER_OPERATOR(tree_conv_grad, ops::TreeConvGradOp);
REGISTER_OP_CPU_KERNEL(tree_conv,
ops::TreeConvKernel<phi::CPUContext, float>,
ops::TreeConvKernel<phi::CPUContext, double>);
REGISTER_OP_CPU_KERNEL(tree_conv_grad,
ops::TreeConvGradKernel<phi::CPUContext, float>,
ops::TreeConvGradKernel<phi::CPUContext, double>);
PD_REGISTER_STRUCT_KERNEL(
tree_conv, CPU, ALL_LAYOUT, ops::TreeConvKernel, float, double) {}
PD_REGISTER_STRUCT_KERNEL(
tree_conv_grad, CPU, ALL_LAYOUT, ops::TreeConvGradKernel, float, double) {}
......@@ -15,9 +15,8 @@
#include "paddle/fluid/operators/tree_conv_op.h"
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(tree_conv,
ops::TreeConvKernel<phi::GPUContext, float>,
ops::TreeConvKernel<phi::GPUContext, double>);
REGISTER_OP_CUDA_KERNEL(tree_conv_grad,
ops::TreeConvGradKernel<phi::GPUContext, float>,
ops::TreeConvGradKernel<phi::GPUContext, double>);
PD_REGISTER_STRUCT_KERNEL(
tree_conv, GPU, ALL_LAYOUT, ops::TreeConvKernel, float, double) {}
PD_REGISTER_STRUCT_KERNEL(
tree_conv_grad, GPU, ALL_LAYOUT, ops::TreeConvGradKernel, float, double) {}
......@@ -23,7 +23,7 @@
namespace paddle {
namespace operators {
using DDim = framework::DDim;
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class TreeConvKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
......@@ -73,7 +73,7 @@ class TreeConvKernel : public framework::OpKernel<T> {
}
}
};
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class TreeConvGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
......
......@@ -75,8 +75,11 @@ namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(unique_with_counts,
ops::UniqueWithCountsOp,
ops::UniqueWithCountsOpMaker);
REGISTER_OP_CPU_KERNEL(unique_with_counts,
ops::UniqueWithCountsKernel<float>,
ops::UniqueWithCountsKernel<double>,
ops::UniqueWithCountsKernel<int32_t>,
ops::UniqueWithCountsKernel<int64_t>);
PD_REGISTER_STRUCT_KERNEL(unique_with_counts,
CPU,
ALL_LAYOUT,
ops::UniqueWithCountsKernel,
float,
double,
int32_t,
int64_t) {}
......@@ -25,7 +25,7 @@ limitations under the License. */
namespace paddle {
namespace operators {
template <typename T>
template <typename T, typename DeviceContext>
class UniqueWithCountsKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
......
......@@ -176,7 +176,7 @@ void VarConv2dOP::InferShape(framework::InferShapeContext* ctx) const {
}
}
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class CPUVarConv2dOPKernel : public framework::OpKernel<T> {
public:
void Im2Col(const framework::ExecutionContext& ctx,
......@@ -392,7 +392,7 @@ void VarConv2dOpGrad::InferShape(framework::InferShapeContext* ctx) const {
}
}
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class CPUVarConv2dOPGradKernel : public framework::OpKernel<T> {
public:
void Im2ColGrad(const framework::ExecutionContext& ctx, T* top_diff) const {
......@@ -532,11 +532,7 @@ REGISTER_OPERATOR(var_conv_2d,
ops::VarConv2dGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(var_conv_2d_grad, ops::VarConv2dOpGrad);
REGISTER_OP_CPU_KERNEL(var_conv_2d,
ops::CPUVarConv2dOPKernel<phi::CPUContext, float>);
// ops::CPUVarConv2dOPKernel<phi::CPUContext,
// double>
REGISTER_OP_CPU_KERNEL(var_conv_2d_grad,
ops::CPUVarConv2dOPGradKernel<phi::CPUContext, float>);
// ops::CPUVarConv2dOPGradKernel<phi::CPUContext,
// double>
PD_REGISTER_STRUCT_KERNEL(
var_conv_2d, CPU, ALL_LAYOUT, ops::CPUVarConv2dOPKernel, float) {}
PD_REGISTER_STRUCT_KERNEL(
var_conv_2d_grad, CPU, ALL_LAYOUT, ops::CPUVarConv2dOPGradKernel, float) {}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册