diff --git a/paddle/fluid/framework/op_desc.cc b/paddle/fluid/framework/op_desc.cc index 353db435213c74982d582e5be298ecfb1a810f30..e6f5b15af8cd440a9304235acfe62787c5f1b134 100644 --- a/paddle/fluid/framework/op_desc.cc +++ b/paddle/fluid/framework/op_desc.cc @@ -617,6 +617,25 @@ void OpDesc::Flush() { static std::once_flag init_infer_shape_funcs; +/** + * NOTE(paddle-dev): Very tricky code here. Maybe we should find a + * better way to register compile-time infershape method gentlely. + * + * Normally, we can register a class derived from InferShapeBase, so that + * we can set the field of `infer_shape_` inside OpInfo when registering op. + * + * However, there is another way we can set the field of `infer_shape_` inside + * OpInfo. Usually, we overload InferShape method of OperatorWithKernel. After + * running the following method InitInferShapeFuncs, `infer_shape_` would be set + * to be the InferShape method of OperatorWithKernel. That is to say, we borrow + * the run-time InferShape method of OperatorWithKernel to be the compile-time + * InferShape method. + * + * However, during compiling time, we may not know inputs, outputs and attrs of + * run-time OperatorWithKernel. So the following code creates a fake + * OperatorWithKernel object. That is why the field info_ of OperatorBase + * would be null. + */ static void InitInferShapeFuncs() { std::call_once(init_infer_shape_funcs, [] { auto &map = OpInfoMap::Instance(); @@ -628,11 +647,16 @@ static void InitInferShapeFuncs() { PADDLE_ENFORCE(it != info_map.end(), "%s has not been registered", op_type); auto &op_info = it->second; - auto op = static_cast(op_info.Creator()( - "", VariableNameMap{}, VariableNameMap{}, AttributeMap{})); if (op_info.infer_shape_) { // infer_shape has been registered. continue; } + + auto op = dynamic_cast(op_info.Creator()( + "", VariableNameMap{}, VariableNameMap{}, AttributeMap{})); + + PADDLE_ENFORCE_NOT_NULL( + op, "InferShapeBase is not registered to Operator %s", op_type); + op_info.infer_shape_ = [op](InferShapeContext *ctx) { op->InferShape(ctx); }; diff --git a/paddle/fluid/op_use_default_grad_op_maker.spec b/paddle/fluid/op_use_default_grad_op_maker.spec index 712449f6be87a3cfb61f099ad6291875c8ad1292..f0e3d3e86f24cb68f6e9d41f48c9698b43bca13e 100644 --- a/paddle/fluid/op_use_default_grad_op_maker.spec +++ b/paddle/fluid/op_use_default_grad_op_maker.spec @@ -8,9 +8,6 @@ conv_shift cos cos_sim dequantize -elementwise_div -elementwise_max -elementwise_min elu fc flatten @@ -28,8 +25,6 @@ gelu gru hard_shrink hierarchical_sigmoid -hinge_loss -huber_loss leaky_relu log logsigmoid @@ -57,7 +52,6 @@ requantize reshape rnn_memory_helper round -row_conv sequence_softmax sin softplus diff --git a/paddle/fluid/operators/batch_size_like.h b/paddle/fluid/operators/batch_size_like.h index fc15d56891cf7af10a91ca22a09c84fa2e52d465..7e2740f148f1d273310f44ed4a35d413e7201394 100644 --- a/paddle/fluid/operators/batch_size_like.h +++ b/paddle/fluid/operators/batch_size_like.h @@ -74,5 +74,8 @@ class BatchSizeLikeOpMaker : public framework::OpProtoAndCheckerMaker { virtual void Apply() = 0; }; +DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(BatchSizeLikeNoNeedBufferVarsInference, + "Input"); + } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/elementwise/elementwise_div_op.cc b/paddle/fluid/operators/elementwise/elementwise_div_op.cc index 85612ba47448a7b0d712e9314e3980019c96e9c3..530a54b7ca186008bc8ec4b083254e65378ae619 100644 --- a/paddle/fluid/operators/elementwise/elementwise_div_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_div_op.cc @@ -13,10 +13,47 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_div_op.h" +#include +#include #include "paddle/fluid/operators/elementwise/elementwise_op.h" + +namespace paddle { +namespace operators { + +class ElementwiseDivOpMaker : public ElementwiseOpMaker { + protected: + std::string GetName() const override { return "Div"; } + std::string GetEquation() const override { return "Out = X / Y"; } +}; + +class ElementwiseDivGradOpDescMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + std::unique_ptr op(new framework::OpDesc()); + op->SetType("elementwise_div_grad"); + op->SetInput("Y", Input("Y")); + op->SetInput("Out", Output("Out")); + op->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); + op->SetOutput(framework::GradVarName("X"), InputGrad("X")); + op->SetOutput(framework::GradVarName("Y"), InputGrad("Y")); + op->SetAttrMap(Attrs()); + return op; + } +}; + +} // namespace operators +} // namespace paddle + namespace ops = paddle::operators; -REGISTER_ELEMWISE_OP(elementwise_div, "Div", "Out = X / Y"); +REGISTER_OPERATOR(elementwise_div, ops::ElementwiseOp, + ops::ElementwiseDivOpMaker, ops::ElementwiseOpInferVarType, + ops::ElementwiseDivGradOpDescMaker); + +REGISTER_OPERATOR(elementwise_div_grad, ops::ElementwiseOpGrad); REGISTER_OP_CPU_KERNEL( elementwise_div, diff --git a/paddle/fluid/operators/elementwise/elementwise_div_op.h b/paddle/fluid/operators/elementwise/elementwise_div_op.h index 8a07339077aeaa4403ffd1e1e30e0d58a9cc30e7..0f0ad8637301772f073bca305b9196b9c7865daf 100644 --- a/paddle/fluid/operators/elementwise/elementwise_div_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_div_op.h @@ -47,7 +47,7 @@ struct DivGradDX { template struct DivGradDY { HOSTDEVICE T operator()(T x, T y, T out, T dout) const { - return -dout * x / (y * y); + return -dout * out / y; } }; @@ -58,13 +58,15 @@ class ElementwiseDivGradKernel : public ElemwiseGradKernel { ElemwiseGradKernel::Compute(ctx); using Tensor = framework::Tensor; - auto* x = ctx.Input("X"); auto* y = ctx.Input("Y"); auto* out = ctx.Input("Out"); auto* dout = ctx.Input(framework::GradVarName("Out")); auto* dx = ctx.Output(framework::GradVarName("X")); auto* dy = ctx.Output(framework::GradVarName("Y")); int axis = ctx.Attr("axis"); + + auto* x = dout; // Fake x, not used + ElemwiseGradCompute, DivGradDY>( ctx, *x, *y, *out, *dout, axis, dx, dy, DivGradDX(), DivGradDY()); } diff --git a/paddle/fluid/operators/elementwise/elementwise_max_op.cc b/paddle/fluid/operators/elementwise/elementwise_max_op.cc index ea0dcd736e5700fb0f341938ac3e3e3b178f29c1..b7df9c6f845dfc941e3c6acbc986a584e984a1de 100644 --- a/paddle/fluid/operators/elementwise/elementwise_max_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_max_op.cc @@ -13,9 +13,48 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_max_op.h" +#include +#include #include "paddle/fluid/operators/elementwise/elementwise_op.h" + +namespace paddle { +namespace operators { + +class ElementwiseMaxOpMaker : public ElementwiseOpMaker { + protected: + std::string GetName() const override { return "Max"; } + std::string GetEquation() const override { return "Out = max(X, Y)"; } +}; + +class ElementwiseMaxGradOpDescMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + std::unique_ptr op(new framework::OpDesc()); + op->SetType("elementwise_max_grad"); + op->SetInput("X", Input("X")); + op->SetInput("Y", Input("Y")); + op->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); + op->SetOutput(framework::GradVarName("X"), InputGrad("X")); + op->SetOutput(framework::GradVarName("Y"), InputGrad("Y")); + op->SetAttrMap(Attrs()); + return op; + } +}; + +} // namespace operators +} // namespace paddle + namespace ops = paddle::operators; -REGISTER_ELEMWISE_OP(elementwise_max, "Max", "Out = max(X, Y)"); + +REGISTER_OPERATOR(elementwise_max, ops::ElementwiseOp, + ops::ElementwiseMaxOpMaker, ops::ElementwiseOpInferVarType, + ops::ElementwiseMaxGradOpDescMaker); + +REGISTER_OPERATOR(elementwise_max_grad, ops::ElementwiseOpGrad); + REGISTER_OP_CPU_KERNEL( elementwise_max, ops::ElementwiseMaxKernel, diff --git a/paddle/fluid/operators/elementwise/elementwise_max_op.h b/paddle/fluid/operators/elementwise/elementwise_max_op.h index 3ee0c32e0d5d5df02d5d157416918fb4fb3aca92..abdb1b9671de80d02b9a6a788088f47929fcc6f0 100644 --- a/paddle/fluid/operators/elementwise/elementwise_max_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_max_op.h @@ -63,10 +63,10 @@ class ElementwiseMaxGradKernel : public ElemwiseGradKernel { auto* x = ctx.Input("X"); auto* y = ctx.Input("Y"); - auto* out = ctx.Input("Out"); auto* dout = ctx.Input(framework::GradVarName("Out")); auto* dx = ctx.Output(framework::GradVarName("X")); auto* dy = ctx.Output(framework::GradVarName("Y")); + auto* out = dout; // Fake out, not used int axis = ctx.Attr("axis"); ElemwiseGradCompute, MaxGradDy>( ctx, *x, *y, *out, *dout, axis, dx, dy, MaxGradDx(), MaxGradDy()); diff --git a/paddle/fluid/operators/elementwise/elementwise_min_op.cc b/paddle/fluid/operators/elementwise/elementwise_min_op.cc index b263b9addd40cfd329d2cc8588c278df2cb008e9..f60c0ed8a0faad384f4eaa631c2758f83bc56414 100644 --- a/paddle/fluid/operators/elementwise/elementwise_min_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_min_op.cc @@ -13,9 +13,48 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_min_op.h" +#include +#include #include "paddle/fluid/operators/elementwise/elementwise_op.h" + +namespace paddle { +namespace operators { + +class ElementwiseMinOpMaker : public ElementwiseOpMaker { + protected: + std::string GetName() const override { return "Min"; } + std::string GetEquation() const override { return "Out = min(X, Y)"; } +}; + +class ElementwiseMinGradOpDescMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + std::unique_ptr op(new framework::OpDesc()); + op->SetType("elementwise_min_grad"); + op->SetInput("X", Input("X")); + op->SetInput("Y", Input("Y")); + op->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); + op->SetOutput(framework::GradVarName("X"), InputGrad("X")); + op->SetOutput(framework::GradVarName("Y"), InputGrad("Y")); + op->SetAttrMap(Attrs()); + return op; + } +}; + +} // namespace operators +} // namespace paddle + namespace ops = paddle::operators; -REGISTER_ELEMWISE_OP(elementwise_min, "Min", "Out = min(X, Y)"); + +REGISTER_OPERATOR(elementwise_min, ops::ElementwiseOp, + ops::ElementwiseMinOpMaker, ops::ElementwiseOpInferVarType, + ops::ElementwiseMinGradOpDescMaker); + +REGISTER_OPERATOR(elementwise_min_grad, ops::ElementwiseOpGrad); + REGISTER_OP_CPU_KERNEL( elementwise_min, ops::ElementwiseMinKernel, diff --git a/paddle/fluid/operators/elementwise/elementwise_min_op.h b/paddle/fluid/operators/elementwise/elementwise_min_op.h index d04e372faaa4e6296e982afe6155cdde2fec4f81..1a49a6013987ae1ec685ec91ca656e4756ba7c32 100644 --- a/paddle/fluid/operators/elementwise/elementwise_min_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_min_op.h @@ -62,10 +62,10 @@ class ElementwiseMinGradKernel : public ElemwiseGradKernel { auto* x = ctx.Input("X"); auto* y = ctx.Input("Y"); - auto* out = ctx.Input("Out"); auto* dout = ctx.Input(framework::GradVarName("Out")); auto* dx = ctx.Output(framework::GradVarName("X")); auto* dy = ctx.Output(framework::GradVarName("Y")); + auto* out = dout; // Fake out, not used int axis = ctx.Attr("axis"); ElemwiseGradCompute, MinGradDy>( ctx, *x, *y, *out, *dout, axis, dx, dy, MinGradDx(), MinGradDy()); diff --git a/paddle/fluid/operators/elementwise/elementwise_op.h b/paddle/fluid/operators/elementwise/elementwise_op.h index 6dbb9072495f743a4df1ff05e029a227c2cf618b..95246b38f530ff5f81e1fbb5f1dd22149943c8ff 100644 --- a/paddle/fluid/operators/elementwise/elementwise_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_op.h @@ -173,12 +173,12 @@ class ElementwiseOpGrad : public framework::OperatorWithKernel { using Tensor = framework::Tensor; void InferShape(framework::InferShapeContext *ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null"); + auto out_grad_name = framework::GradVarName("Out"); PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should not be null"); - PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), + PADDLE_ENFORCE(ctx->HasInput(out_grad_name), "Input(Out@GRAD) should not be null"); - auto x_dims = ctx->GetInputDim("X"); + auto x_dims = ctx->GetInputDim(out_grad_name); auto y_dims = ctx->GetInputDim("Y"); PADDLE_ENFORCE_GE(x_dims.size(), y_dims.size(), @@ -187,8 +187,8 @@ class ElementwiseOpGrad : public framework::OperatorWithKernel { auto x_grad_name = framework::GradVarName("X"); auto y_grad_name = framework::GradVarName("Y"); if (ctx->HasOutput(x_grad_name)) { - ctx->ShareDim("X", /*->*/ x_grad_name); - ctx->ShareLoD("X", /*->*/ x_grad_name); + ctx->ShareDim(out_grad_name, /*->*/ x_grad_name); + ctx->ShareLoD(out_grad_name, /*->*/ x_grad_name); } if (ctx->HasOutput(y_grad_name)) { ctx->ShareDim("Y", /*->*/ y_grad_name); diff --git a/paddle/fluid/operators/fill_constant_batch_size_like_op.cc b/paddle/fluid/operators/fill_constant_batch_size_like_op.cc index 453a1b32a0171a2ca88879ab3287e89c4d3c7759..b8921b171cf37be17fb62d270a5c22f9d1806c64 100644 --- a/paddle/fluid/operators/fill_constant_batch_size_like_op.cc +++ b/paddle/fluid/operators/fill_constant_batch_size_like_op.cc @@ -46,6 +46,7 @@ obtained from the `input` tensor. )DOC"); } }; + } // namespace operators } // namespace paddle @@ -53,7 +54,8 @@ namespace ops = paddle::operators; REGISTER_OPERATOR(fill_constant_batch_size_like, ops::FillConstantBatchSizeLikeOp, paddle::framework::EmptyGradOpMaker, - ops::FillConstantBatchSizeLikeOpMaker); + ops::FillConstantBatchSizeLikeOpMaker, + ops::BatchSizeLikeNoNeedBufferVarsInference); REGISTER_OP_CPU_KERNEL( fill_constant_batch_size_like, ops::FillConstantBatchSizeLikeOpKernel(ctx.Attr("dtype")), + ctx.GetPlace()); + } }; + +class FillZerosLikeOp2Maker : public FillZerosLikeOpMaker { + protected: + void ExtraMake() override { + this->AddAttr("dtype", + "(int, default 5(FP32)) " + "Output data type.") + .SetDefault(framework::proto::VarType::FP32); + } +}; + +DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(FillZerosLikeOp2NoNeedBufferVarsInference, + "X"); + } // namespace operators } // namespace paddle namespace ops = paddle::operators; REGISTER_OP_WITHOUT_GRADIENT(fill_zeros_like, ops::FillZerosLikeOp, ops::FillZerosLikeOpMaker); + +REGISTER_OPERATOR(fill_zeros_like2, ops::FillZerosLikeOp2, + ops::FillZerosLikeOp2Maker, + ops::FillZerosLikeOp2NoNeedBufferVarsInference, + paddle::framework::EmptyGradOpMaker); + REGISTER_OP_CPU_KERNEL( fill_zeros_like, ops::FillZerosLikeKernel, @@ -58,3 +95,11 @@ REGISTER_OP_CPU_KERNEL( ops::FillZerosLikeKernel, ops::FillZerosLikeKernel, ops::FillZerosLikeKernel); + +REGISTER_OP_CPU_KERNEL( + fill_zeros_like2, + ops::FillZerosLikeKernel, + ops::FillZerosLikeKernel, + ops::FillZerosLikeKernel, + ops::FillZerosLikeKernel, + ops::FillZerosLikeKernel); diff --git a/paddle/fluid/operators/fill_zeros_like_op.cu.cc b/paddle/fluid/operators/fill_zeros_like_op.cu.cc index e80a703c30c0335124c089ea82ba4f6fe055acde..1831635def79b3ccb713dbc14cc70b8beeb609fc 100644 --- a/paddle/fluid/operators/fill_zeros_like_op.cu.cc +++ b/paddle/fluid/operators/fill_zeros_like_op.cu.cc @@ -26,3 +26,13 @@ REGISTER_OP_CUDA_KERNEL( ops::FillZerosLikeKernel, ops::FillZerosLikeKernel); + +REGISTER_OP_CUDA_KERNEL( + fill_zeros_like2, + ops::FillZerosLikeKernel, + ops::FillZerosLikeKernel, + ops::FillZerosLikeKernel, + ops::FillZerosLikeKernel, + ops::FillZerosLikeKernel, + ops::FillZerosLikeKernel); diff --git a/paddle/fluid/operators/gaussian_random_batch_size_like_op.cc b/paddle/fluid/operators/gaussian_random_batch_size_like_op.cc index 98ebe1fdf4bb3308b2f07a073072031e79e14146..01302687a421165e908b2aa0646ba8b9c835034e 100644 --- a/paddle/fluid/operators/gaussian_random_batch_size_like_op.cc +++ b/paddle/fluid/operators/gaussian_random_batch_size_like_op.cc @@ -65,17 +65,13 @@ by input arguments. } }; -DECLARE_NO_NEED_BUFFER_VARS_INFERENCE( - GaussianRandomBatchSizeLikeNoNeedBufferVarsInference, "Input"); - } // namespace operators } // namespace paddle -REGISTER_OPERATOR( - gaussian_random_batch_size_like, - paddle::operators::GaussianRandomBatchSizeLikeOp, - paddle::operators::GaussianRandomBatchSizeLikeOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::operators::GaussianRandomBatchSizeLikeNoNeedBufferVarsInference); +REGISTER_OPERATOR(gaussian_random_batch_size_like, + paddle::operators::GaussianRandomBatchSizeLikeOp, + paddle::operators::GaussianRandomBatchSizeLikeOpMaker, + paddle::framework::EmptyGradOpMaker, + paddle::operators::BatchSizeLikeNoNeedBufferVarsInference); // Kernels are registered in gaussian_random_op.cc and gaussian_random_op.cu diff --git a/paddle/fluid/operators/hinge_loss_op.cc b/paddle/fluid/operators/hinge_loss_op.cc index f458ce6c83bfcfb56d558409b0802f27f13a4761..b6cfa9cc43c312e60a1b7c5e13d1ecbe6bc5dc7d 100644 --- a/paddle/fluid/operators/hinge_loss_op.cc +++ b/paddle/fluid/operators/hinge_loss_op.cc @@ -13,6 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/hinge_loss_op.h" +#include +#include +#include namespace paddle { namespace operators { @@ -97,12 +100,29 @@ class HingeLossGradOp : public framework::OperatorWithKernel { } }; +class HingeLossGradOpDescMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + std::unique_ptr op(new framework::OpDesc()); + op->SetType("hinge_loss_grad"); + op->SetInput("Logits", Input("Logits")); + op->SetInput("Labels", Input("Labels")); + op->SetInput(framework::GradVarName("Loss"), OutputGrad("Loss")); + op->SetOutput(framework::GradVarName("Logits"), InputGrad("Logits")); + op->SetAttrMap(Attrs()); + return op; + } +}; + } // namespace operators } // namespace paddle namespace ops = paddle::operators; REGISTER_OPERATOR(hinge_loss, ops::HingeLossOp, ops::HingeLossOpMaker, - paddle::framework::DefaultGradOpDescMaker); + ops::HingeLossGradOpDescMaker); REGISTER_OPERATOR(hinge_loss_grad, ops::HingeLossGradOp); REGISTER_OP_CPU_KERNEL( hinge_loss, diff --git a/paddle/fluid/operators/huber_loss_op.cc b/paddle/fluid/operators/huber_loss_op.cc index 253b65a5f33308fc2c94537641b0fa19378b0cc9..a72db384c1f09f66ecf7ce85271d6263bbdcb523 100644 --- a/paddle/fluid/operators/huber_loss_op.cc +++ b/paddle/fluid/operators/huber_loss_op.cc @@ -13,6 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/huber_loss_op.h" +#include +#include +#include namespace paddle { namespace operators { @@ -90,38 +93,45 @@ class HuberLossGradOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null."); - PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should not be null."); - PADDLE_ENFORCE(ctx->HasInput("Residual"), - "Input(Residual) should not be null."); PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), "Input(Out@GRAD) should not be null."); - auto x_dims = ctx->GetInputDim("X"); - auto y_dims = ctx->GetInputDim("Y"); auto residual_dims = ctx->GetInputDim("Residual"); - auto out_grad_dims = ctx->GetInputDim(framework::GradVarName("Out")); - - PADDLE_ENFORCE_EQ(residual_dims, x_dims); - PADDLE_ENFORCE_EQ(out_grad_dims, x_dims); auto x_grad_name = framework::GradVarName("X"); auto y_grad_name = framework::GradVarName("Y"); if (ctx->HasOutput(x_grad_name)) { - ctx->SetOutputDim(x_grad_name, x_dims); + ctx->SetOutputDim(x_grad_name, residual_dims); } if (ctx->HasOutput(y_grad_name)) { - ctx->SetOutputDim(y_grad_name, y_dims); + ctx->SetOutputDim(y_grad_name, residual_dims); } } }; +class HuberLossGradOpDescMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + std::unique_ptr op(new framework::OpDesc()); + op->SetType("huber_loss_grad"); + op->SetInput("Residual", Output("Residual")); + op->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); + op->SetOutput(framework::GradVarName("X"), InputGrad("X")); + op->SetOutput(framework::GradVarName("Y"), InputGrad("Y")); + op->SetAttrMap(Attrs()); + return op; + } +}; + } // namespace operators } // namespace paddle namespace ops = paddle::operators; REGISTER_OPERATOR(huber_loss, ops::HuberLossOp, ops::HuberLossOpMaker, - paddle::framework::DefaultGradOpDescMaker); + ops::HuberLossGradOpDescMaker); REGISTER_OPERATOR(huber_loss_grad, ops::HuberLossGradOp); REGISTER_OP_CPU_KERNEL( huber_loss, ops::HuberLossKernel, diff --git a/paddle/fluid/operators/row_conv_op.cc b/paddle/fluid/operators/row_conv_op.cc index d283bddbe9f974ac6835ee91d5a7851453687b80..81aabdd0061b3940f23d4731d55fc5cbe5817004 100644 --- a/paddle/fluid/operators/row_conv_op.cc +++ b/paddle/fluid/operators/row_conv_op.cc @@ -13,6 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/row_conv_op.h" +#include +#include +#include + #include "paddle/fluid/framework/eigen.h" namespace paddle { @@ -54,7 +58,6 @@ class RowConvGradOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext *ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null."); PADDLE_ENFORCE(ctx->HasInput("Filter"), "Input(Filter) should not be null."); PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), @@ -62,8 +65,8 @@ class RowConvGradOp : public framework::OperatorWithKernel { auto x_grad_name = framework::GradVarName("X"); if (ctx->HasOutput(x_grad_name)) { - auto x_dims = ctx->GetInputDim("X"); - ctx->SetOutputDim(x_grad_name, x_dims); + auto dout_dims = ctx->GetInputDim(framework::GradVarName("Out")); + ctx->SetOutputDim(x_grad_name, dout_dims); } auto filter_grad_name = framework::GradVarName("Filter"); @@ -259,12 +262,31 @@ class RowConvGradKernel } } }; + +class RowConvGradOpDescMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + std::unique_ptr op(new framework::OpDesc()); + op->SetType("row_conv_grad"); + op->SetAttrMap(Attrs()); + op->SetInput("X", Input("X")); + op->SetInput("Filter", Input("Filter")); + op->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); + op->SetOutput(framework::GradVarName("X"), InputGrad("X")); + op->SetOutput(framework::GradVarName("Filter"), InputGrad("Filter")); + return op; + } +}; + } // namespace operators } // namespace paddle namespace ops = paddle::operators; REGISTER_OPERATOR(row_conv, ops::RowConvOp, ops::RowConvOpMaker, - paddle::framework::DefaultGradOpDescMaker); + ops::RowConvGradOpDescMaker); REGISTER_OPERATOR(row_conv_grad, ops::RowConvGradOp); REGISTER_OP_CPU_KERNEL( row_conv, ops::RowConvKernel); diff --git a/paddle/fluid/operators/uniform_random_batch_size_like_op.cc b/paddle/fluid/operators/uniform_random_batch_size_like_op.cc index 75d6181749e4e9bd81a3c02de69caf0acd81eef9..7260fe25d6ebb357040af8774c574b767bfd9f13 100644 --- a/paddle/fluid/operators/uniform_random_batch_size_like_op.cc +++ b/paddle/fluid/operators/uniform_random_batch_size_like_op.cc @@ -64,8 +64,9 @@ with random values sampled from a uniform distribution. } // namespace operators } // namespace paddle -REGISTER_OP_WITHOUT_GRADIENT( - uniform_random_batch_size_like, - paddle::operators::UniformRandomBatchSizeLikeOp, - paddle::operators::UniformRandomBatchSizeLikeOpMaker); +REGISTER_OPERATOR(uniform_random_batch_size_like, + paddle::operators::UniformRandomBatchSizeLikeOp, + paddle::operators::UniformRandomBatchSizeLikeOpMaker, + paddle::framework::EmptyGradOpMaker, + paddle::operators::BatchSizeLikeNoNeedBufferVarsInference); // Kernels are registered in uniform_random_op.cc and uniform_random_op.cu diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index 6303be003a701e57a8aa1e2f925459f416cdb543..9fd53a74bf51929f9e115fdc94f2f85f8e2fbdda 100644 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -231,9 +231,16 @@ def _remove_no_grad_branch_(op_descs, no_grad_set): for idx, op_desc in enumerate(op_descs): for arg in op_desc.input_arg_names(): if core.grad_var_suffix() in arg and arg in no_grad_set: - to_insert.append((_create_op_desc_("fill_zeros_like", { - "X": [_strip_grad_suffix_(arg)] - }, {"Out": [arg]}, {}), idx)) + x_in = _strip_grad_suffix_(arg) + x_in_var_desc = op_desc.block().find_var_recursive( + cpt.to_bytes(x_in)) + assert x_in_var_desc is not None, "Variable {} not found".format( + x_in) + dtype = x_in_var_desc.dtype() + + to_insert.append( + (_create_op_desc_("fill_zeros_like2", {"X": [x_in]}, + {"Out": [arg]}, {"dtype": dtype}), idx)) list([op_descs.insert(p[1], p[0]) for p in reversed(to_insert)]) diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_no_need_buffer_vars_inference.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_no_need_buffer_vars_inference.py index 4844d930daca75595376b1f1f67ae03011a713c6..a84ff1fd6d46c30ad7aa72f1b29a8ae668b90e20 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_deletion_no_need_buffer_vars_inference.py +++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_no_need_buffer_vars_inference.py @@ -23,6 +23,8 @@ from test_elementwise_sub_op import * from test_concat_op import * from test_gather_op import * from test_gaussian_random_batch_size_like_op import * +from test_uniform_random_batch_size_like_op import * +from test_fill_constant_batch_size_like_op import * from test_lod_reset_op import * from test_scatter_op import * from test_mean_op import * @@ -40,6 +42,7 @@ from test_sequence_unpad_op import * from test_sequence_scatter_op import * from test_sequence_slice_op import * from test_pad2d_op import * +from test_fill_zeros_like2_op import * if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_fill_zeros_like2_op.py b/python/paddle/fluid/tests/unittests/test_fill_zeros_like2_op.py new file mode 100644 index 0000000000000000000000000000000000000000..935653b07a6a4e1d344e8040fa4a0ed72b9b164d --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_fill_zeros_like2_op.py @@ -0,0 +1,50 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from paddle.fluid.framework import convert_np_dtype_to_dtype_ +from op_test import OpTest + + +class TestFillZerosLike2Op(OpTest): + def setUp(self): + self.op_type = "fill_zeros_like2" + self.dtype = np.float32 + self.init_dtype() + self.inputs = {'X': np.random.random((219, 232)).astype(self.dtype)} + self.outputs = {'Out': np.zeros_like(self.inputs["X"])} + self.attrs = {'dtype': convert_np_dtype_to_dtype_(self.dtype)} + + def init_dtype(self): + pass + + def test_check_output(self): + self.check_output() + + +class TestFillZerosLike2OpFp16(TestFillZerosLike2Op): + def init_dtype(self): + self.dtype = np.float16 + + +class TestFillZerosLike2OpFp64(TestFillZerosLike2Op): + def init_dtype(self): + self.dtype = np.float64 + + +if __name__ == "__main__": + unittest.main()