From 0bfa1f7c7a07f9e7f095a506451cd2efe08212b8 Mon Sep 17 00:00:00 2001 From: xuwei06 Date: Fri, 1 Dec 2017 10:01:04 -0800 Subject: [PATCH] Enforce drop_empty_grad=false When the input of an op is duplicable. For input argument with a list of variables, drop_empty_grad is not allowed because it makes the correspondence bewteen a variable and its gradient ambiguous. Use REGISTER_OP_EX to register the op or call InputGrad(?,false) in GradOpDescMaker. --- paddle/framework/grad_op_desc_maker.h | 18 ++++++++++ paddle/framework/op_desc.h | 2 ++ paddle/framework/op_registry.h | 43 +++++++++++++++++------- paddle/operators/concat_op.cc | 4 +-- paddle/operators/conditional_block_op.cc | 5 +-- paddle/operators/recurrent_op.cc | 2 +- paddle/operators/sequence_concat_op.cc | 13 +++---- paddle/operators/sum_op.cc | 6 ++-- 8 files changed, 66 insertions(+), 27 deletions(-) diff --git a/paddle/framework/grad_op_desc_maker.h b/paddle/framework/grad_op_desc_maker.h index 8c47c0b0c8..cf411fa710 100644 --- a/paddle/framework/grad_op_desc_maker.h +++ b/paddle/framework/grad_op_desc_maker.h @@ -22,6 +22,14 @@ namespace paddle { namespace framework { +/* + This functor class is responsible for creating the gradient ops for the given + operator fwd_op. After it is called (through operator()), the pairs of + (gradient variable, corresponding input variable of fwd_op) will be added to + grad_to_var. If an input variable of fwd_op is contained in no_grad_set, its + gradient varialbe will be ignored or kEmptyVarName depending on the template + argument DropEmptyIG in the derived classes. + */ class GradOpDescMakerBase { public: explicit GradOpDescMakerBase( @@ -56,6 +64,16 @@ class GradOpDescMakerBase { if (!drop_empty_grad) { return ret_val; } + PADDLE_ENFORCE_LE(var_names.size(), 1UL, + "BUG from operator developer:" + " for input argument with a list of variables, " + " drop_empty_grad is not allowed because it makes" + " the correspondence bewteen a variable and its gradient" + " ambiguous. Use REGISTER_OP_EX to register the op" + " or call InputGrad(?,false) in GradOpDescMaker." + " Op type %s", + fwd_op_.Type()); + std::vector dropped_ret_val; dropped_ret_val.reserve(ret_val.size()); std::copy_if(ret_val.begin(), ret_val.end(), diff --git a/paddle/framework/op_desc.h b/paddle/framework/op_desc.h index 18fa02940d..93d4a88f3c 100644 --- a/paddle/framework/op_desc.h +++ b/paddle/framework/op_desc.h @@ -127,7 +127,9 @@ class OpDesc { } proto::OpDesc desc_; + // input arg name => output variable names VariableNameMap inputs_; + // output arg name => output variable names VariableNameMap outputs_; AttributeMap attrs_; diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index 278550d496..7f0155b61f 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -126,6 +126,14 @@ class OpKernelRegistrar : public Registrar { __test_global_namespace_##uniq_name##__>::value, \ msg) +/* + The variadic arguments should be class types derived from one of the + following classes: + OpProtoAndCheckerMaker + GradOpDescMakerBase + VarTypeInference + InferShapeBase +*/ #define REGISTER_OPERATOR(op_type, op_class, ...) \ STATIC_ASSERT_GLOBAL_NAMESPACE( \ __reg_op__##op_type, \ @@ -144,20 +152,29 @@ class OpKernelRegistrar : public Registrar { } /** - * Macro to register Operator. + * Macro to register Operator. When the input is duplicable, you should + * use REGISTER_OP_EX with deop_empty_grad=false instead. */ -#define REGISTER_OP(op_type, op_class, op_maker_class, grad_op_type, \ - grad_op_class) \ - REGISTER_OPERATOR(grad_op_type, grad_op_class); \ - class _GradOpDescMaker_##grad_op_type##_ \ - : public ::paddle::framework::DefaultGradOpDescMaker { \ - using ::paddle::framework::DefaultGradOpDescMaker< \ - true>::DefaultGradOpDescMaker; \ - \ - protected: \ - virtual std::string GradOpType() const { return #grad_op_type; } \ - }; \ - REGISTER_OPERATOR(op_type, op_class, _GradOpDescMaker_##grad_op_type##_, \ +#define REGISTER_OP(op_type, op_class, op_maker_class, grad_op_type, \ + grad_op_class) \ + REGISTER_OP_EX(op_type, op_class, op_maker_class, grad_op_type, \ + grad_op_class, true) + +// When an argument is duplicable, we need to use this version. +// Perhaps we can omit DropEmptyIG template parameter and +// only have one version of REGISTER_OP. +#define REGISTER_OP_EX(op_type, op_class, op_maker_class, grad_op_type, \ + grad_op_class, drop_empty_grad) \ + REGISTER_OPERATOR(grad_op_type, grad_op_class); \ + class _GradOpDescMaker_##grad_op_type##_ \ + : public ::paddle::framework::DefaultGradOpDescMaker { \ + using ::paddle::framework::DefaultGradOpDescMaker< \ + drop_empty_grad>::DefaultGradOpDescMaker; \ + \ + protected: \ + virtual std::string GradOpType() const { return #grad_op_type; } \ + }; \ + REGISTER_OPERATOR(op_type, op_class, _GradOpDescMaker_##grad_op_type##_, \ op_maker_class); #define REGISTER_OP_WITH_KERNEL(op_type, ...) \ diff --git a/paddle/operators/concat_op.cc b/paddle/operators/concat_op.cc index 6151e2e73f..32b61edfd0 100644 --- a/paddle/operators/concat_op.cc +++ b/paddle/operators/concat_op.cc @@ -98,8 +98,8 @@ class ConcatOpGrad : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(concat, ops::ConcatOp, ops::ConcatOpMaker, concat_grad, - ops::ConcatOpGrad) +REGISTER_OP_EX(concat, ops::ConcatOp, ops::ConcatOpMaker, concat_grad, + ops::ConcatOpGrad, false) REGISTER_OP_CPU_KERNEL(concat, ops::ConcatKernel) REGISTER_OP_CPU_KERNEL(concat_grad, diff --git a/paddle/operators/conditional_block_op.cc b/paddle/operators/conditional_block_op.cc index 00048a10ca..204be7d1e5 100644 --- a/paddle/operators/conditional_block_op.cc +++ b/paddle/operators/conditional_block_op.cc @@ -178,8 +178,9 @@ class ConditionalBlockGradMaker : public framework::SingleGradOpDescMaker { grad_op->SetInput("Out", Output("Out")); grad_op->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); grad_op->SetInput("Scope", Output("Scope")); - grad_op->SetOutput(framework::GradVarName("X"), InputGrad("X")); - grad_op->SetOutput(framework::GradVarName("Params"), InputGrad("Params")); + grad_op->SetOutput(framework::GradVarName("X"), InputGrad("X", false)); + grad_op->SetOutput(framework::GradVarName("Params"), + InputGrad("Params", false)); grad_op->SetBlockAttr("sub_block", *this->grad_block_[0]); return std::unique_ptr(grad_op); } diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index 4273c12354..5981d5745d 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -570,7 +570,7 @@ class RecurrentGradOpDescMaker : public framework::SingleGradOpDescMaker { for (auto &input_param : this->InputNames()) { grad->SetInput(input_param, this->Input(input_param)); grad->SetOutput(framework::GradVarName(input_param), - this->InputGrad(input_param)); + this->InputGrad(input_param, false)); } for (auto &output_param : this->OutputNames()) { diff --git a/paddle/operators/sequence_concat_op.cc b/paddle/operators/sequence_concat_op.cc index 54e8989f25..2f0aad2003 100644 --- a/paddle/operators/sequence_concat_op.cc +++ b/paddle/operators/sequence_concat_op.cc @@ -67,12 +67,12 @@ class SequenceConcatOpMaker : public framework::OpProtoAndCheckerMaker { "The level should be less than the level number of inputs.") .SetDefault(0); AddComment(R"DOC( -The sequence_concat operator concatenates multiple LoDTensors. -It only supports sequence (LoD Tensor with level number is 1) +The sequence_concat operator concatenates multiple LoDTensors. +It only supports sequence (LoD Tensor with level number is 1) or a nested sequence (LoD tensor with level number is 2) as its input. - Case1: If the axis is other than 0(here, axis is 1 and level is 1), - each input should have the same LoD information and the LoD + each input should have the same LoD information and the LoD information of the output keeps the same as the input. LoD(x0) = {{0,2,4}, {0,1,2,3,4}}; Dims(x0) = (4,3,4) @@ -80,7 +80,7 @@ or a nested sequence (LoD tensor with level number is 2) as its input. LoD(Out) = {{0,2,4}, {0,1,2,3,4}}; Dims(Out) = (4,7,4) - Case2: - If the axis is 0(here, leve is 0), the inputs are concatenated along + If the axis is 0(here, leve is 0), the inputs are concatenated along time steps, the LoD information of the output need to re-compute. The LoD information of level-1 should be same. @@ -124,8 +124,9 @@ class SequenceConcatGradOp : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(sequence_concat, ops::SequenceConcatOp, ops::SequenceConcatOpMaker, - sequence_concat_grad, ops::SequenceConcatGradOp); +REGISTER_OP_EX(sequence_concat, ops::SequenceConcatOp, + ops::SequenceConcatOpMaker, sequence_concat_grad, + ops::SequenceConcatGradOp, false); REGISTER_OP_CPU_KERNEL( sequence_concat, ops::SequenceConcatOpKernel); diff --git a/paddle/operators/sum_op.cc b/paddle/operators/sum_op.cc index 36fb5bd29d..891839bf9c 100644 --- a/paddle/operators/sum_op.cc +++ b/paddle/operators/sum_op.cc @@ -106,8 +106,8 @@ class SumOpMaker : public framework::OpProtoAndCheckerMaker { AddComment(R"DOC( Sum operator. -This operators sums the input tensors. All the inputs can carry the -LoD (Level of Details) information. However, the output only shares +This operators sums the input tensors. All the inputs can carry the +LoD (Level of Details) information. However, the output only shares the LoD information with the first input. )DOC"); } @@ -170,7 +170,7 @@ class SumGradMaker : public framework::GradOpDescMakerBase { using framework::GradOpDescMakerBase::GradOpDescMakerBase; std::vector> operator()() const override { - auto x_grads = InputGrad("X"); + auto x_grads = InputGrad("X", false); std::vector> grad_ops; grad_ops.reserve(x_grads.size()); auto og = OutputGrad("Out"); -- GitLab