diff --git a/paddle/fluid/framework/details/multi_devices_graph_builder.cc b/paddle/fluid/framework/details/multi_devices_graph_builder.cc index 6b0c0a6b9fb29e641449f0c21109611cccd4e5a9..35d23d68c0dd26a05544a72316d5764129aa8d40 100644 --- a/paddle/fluid/framework/details/multi_devices_graph_builder.cc +++ b/paddle/fluid/framework/details/multi_devices_graph_builder.cc @@ -18,6 +18,7 @@ #include "paddle/fluid/framework/details/reduce_op_handle.h" #include "paddle/fluid/framework/details/scale_loss_grad_op_handle.h" #include "paddle/fluid/framework/details/send_op_handle.h" +#include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/framework/scope.h" #ifdef PADDLE_WITH_CUDA @@ -159,25 +160,39 @@ std::unique_ptr MultiDevSSAGraphBuilder::Build( if (!is_forwarding && places_.size() > 1) { // Currently, we assume that once gradient is generated, it can be // broadcast, and each gradient is only broadcast once. - for (auto &og : op->OutputArgumentNames()) { - if (IsParameterGradientOnce(og, &og_has_been_broadcast)) { - switch (strategy_.reduce_) { - case BuildStrategy::ReduceStrategy::kReduce: - CreateReduceOp(&result, og, cur_device_id); - var_name_on_devices[cur_device_id].emplace(og); - bcast_var_name_set[cur_device_id].emplace( - og.substr(0, og.size() - strlen(kGradVarSuffix))); - cur_device_id = (cur_device_id + 1) % places_.size(); - break; - case BuildStrategy::ReduceStrategy::kAllReduce: - if (IsSparseGradient(var_types, og)) { - CreateReduceOp(&result, og, 0); - CreateBroadcastOp(&result, og, 0); - } else { - InsertNCCLAllReduceOp(&result, og); - } - break; + if (static_cast(boost::get(op->GetAttr( + OpProtoAndCheckerMaker::OpRoleAttrName())) & + static_cast(OpRole::kBackward))) { + try { + auto backward_vars = + boost::get>(op->GetNullableAttr( + OpProtoAndCheckerMaker::OpRoleVarAttrName())); + + PADDLE_ENFORCE_EQ(backward_vars.size() % 2, 0); + + for (size_t i = 0; i < backward_vars.size(); i += 2) { + auto &p_name = backward_vars[i]; + auto &g_name = backward_vars[i + 1]; + VLOG(10) << "Bcast " << g_name << " for parameter " << p_name; + + switch (strategy_.reduce_) { + case BuildStrategy::ReduceStrategy::kReduce: + CreateReduceOp(&result, g_name, cur_device_id); + var_name_on_devices[cur_device_id].emplace(g_name); + bcast_var_name_set[cur_device_id].emplace(p_name); + cur_device_id = (cur_device_id + 1) % places_.size(); + break; + case BuildStrategy::ReduceStrategy::kAllReduce: + if (IsSparseGradient(var_types, g_name)) { + CreateReduceOp(&result, g_name, 0); + CreateBroadcastOp(&result, g_name, 0); + } else { + InsertNCCLAllReduceOp(&result, g_name); + } + break; + } } + } catch (boost::bad_get e) { } } } @@ -398,11 +413,12 @@ void MultiDevSSAGraphBuilder::CreateSendOp(SSAGraph *result, } bool MultiDevSSAGraphBuilder::IsScaleLossOp(const OpDesc &op) const { - // FIXME(yy): Do not hard code like this - return op.OutputArgumentNames().size() == 1 && - op.OutputArgumentNames()[0] == GradVarName(loss_var_name_); + return boost::get( + op.GetAttr(OpProtoAndCheckerMaker::OpRoleAttrName())) == + (static_cast(OpRole::kBackward) | + static_cast(OpRole::kLoss)) && + !loss_var_name_.empty(); // If loss_var is empty. This is test mode } - } // namespace details } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/details/op_registry.h b/paddle/fluid/framework/details/op_registry.h index 1c4b059cd0aeff803ca7436d3f198e97a06cd012..eea7e712f8f6e187cdceedce77cc76d1d4ca2101 100644 --- a/paddle/fluid/framework/details/op_registry.h +++ b/paddle/fluid/framework/details/op_registry.h @@ -96,10 +96,7 @@ struct OpInfoFiller { info->proto_ = new proto::OpProto; info->checker_ = new OpAttrChecker(); T maker; - maker.SetProto(info->proto_); - maker.SetChecker(info->checker_); - maker.Make(); - maker.Validate(); + maker(info->proto_, info->checker_); info->proto_->set_type(op_type); PADDLE_ENFORCE( info->proto_->IsInitialized(), diff --git a/paddle/fluid/framework/op_desc.cc b/paddle/fluid/framework/op_desc.cc index 076c45713015797f86a3611dd333132bae40044d..1b9c685866763ed126a1bf5d7fdd851c38ac1c63 100644 --- a/paddle/fluid/framework/op_desc.cc +++ b/paddle/fluid/framework/op_desc.cc @@ -20,6 +20,7 @@ limitations under the License. */ #include #include "glog/logging.h" #include "paddle/fluid/framework/block_desc.h" +#include "paddle/fluid/framework/op_proto_maker.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/shape_inference.h" @@ -222,6 +223,15 @@ Attribute OpDesc::GetAttr(const std::string &name) const { return it->second; } +Attribute OpDesc::GetNullableAttr(const std::string &name) const { + auto it = attrs_.find(name); + if (it != attrs_.end()) { + return it->second; + } else { + return Attribute(); + } +} + int OpDesc::GetBlockAttr(const std::string &name) const { auto it = attrs_.find(name); PADDLE_ENFORCE(it != attrs_.end(), "Attribute %s is not found", name); @@ -249,6 +259,13 @@ void OpDesc::RenameOutput(const std::string &old_name, std::replace(output.second.begin(), output.second.end(), old_name, new_name); } + + auto it = attrs_.find(framework::OpProtoAndCheckerMaker::OpRoleVarAttrName()); + if (it != attrs_.end()) { + auto &op_vars = boost::get>(it->second); + std::replace(op_vars.begin(), op_vars.end(), old_name, new_name); + } + need_update_ = true; } diff --git a/paddle/fluid/framework/op_desc.h b/paddle/fluid/framework/op_desc.h index 3ee36a47c156da67a9ff70852665fbbd464bea17..1a330db7cc5555a939950043ac90a321573b292d 100644 --- a/paddle/fluid/framework/op_desc.h +++ b/paddle/fluid/framework/op_desc.h @@ -78,6 +78,8 @@ class OpDesc { Attribute GetAttr(const std::string &name) const; + Attribute GetNullableAttr(const std::string &name) const; + int GetBlockAttr(const std::string &name) const; void Rename(const std::string &old_name, const std::string &new_name); diff --git a/paddle/fluid/framework/op_proto_maker.cc b/paddle/fluid/framework/op_proto_maker.cc index c479d7617cfa34cd381d84d15d5e214d57af52d0..5a4380a83a2e5bf492098032cd9de7bf274fe47e 100644 --- a/paddle/fluid/framework/op_proto_maker.cc +++ b/paddle/fluid/framework/op_proto_maker.cc @@ -13,6 +13,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_proto_maker.h" #include +#include namespace paddle { namespace framework { @@ -55,5 +56,28 @@ void OpProtoAndCheckerMaker::CheckNoDuplicatedInOutAttrs() { } } +void OpProtoAndCheckerMaker::operator()(proto::OpProto* proto, + OpAttrChecker* attr_checker) { + proto_ = proto; + op_checker_ = attr_checker; + Make(); + + AddAttr(OpRoleAttrName(), "The role of this operator") + .InEnum( + {static_cast(OpRole::kForward), + static_cast(OpRole::kBackward), + static_cast(OpRole::kOptimize), + static_cast(OpRole::kLoss) | static_cast(OpRole::kForward), + static_cast(OpRole::kLoss) | + static_cast(OpRole::kBackward), + static_cast(OpRole::kNotSpecified)}) + .SetDefault(static_cast(OpRole::kNotSpecified)); + AddAttr>(OpRoleVarAttrName(), + "Optimized for variable") + .SetDefault({}); + + Validate(); +} + } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/op_proto_maker.h b/paddle/fluid/framework/op_proto_maker.h index b01a520bba19c1be32363a1a5c381666c82e6afc..9bd6ca6ea32734707a5c37b3ecfe449436c04c8c 100644 --- a/paddle/fluid/framework/op_proto_maker.h +++ b/paddle/fluid/framework/op_proto_maker.h @@ -20,21 +20,31 @@ limitations under the License. */ namespace paddle { namespace framework { +enum class OpRole { + kForward = 0x0000, + kBackward = 0x0001, + kOptimize = 0x0002, + + kLoss = 0x0100, + // The default value of op's role. This should be only used for unittests and + // CreateOp inside a operator. + kNotSpecified = 0x1000, +}; + // this class not only make proto but also init attribute checkers. class OpProtoAndCheckerMaker { public: + static const char *OpRoleAttrName() { return "op_role"; } + static const char *OpRoleVarAttrName() { return "op_role_var"; } + + void operator()(proto::OpProto *proto, OpAttrChecker *attr_checker); + virtual void Make() = 0; virtual ~OpProtoAndCheckerMaker() { CHECK(validated_) << "should call Validate after build"; } - void SetProto(proto::OpProto *proto) { proto_ = proto; } - - void SetChecker(OpAttrChecker *attr_checker) { op_checker_ = attr_checker; } - - void Validate(); - protected: struct VariableBuilder { proto::OpProto::Var *var_; @@ -76,6 +86,7 @@ class OpProtoAndCheckerMaker { private: void CheckNoDuplicatedInOutAttrs(); + void Validate(); proto::OpProto *proto_; OpAttrChecker *op_checker_; diff --git a/paddle/fluid/framework/op_proto_maker_test.cc b/paddle/fluid/framework/op_proto_maker_test.cc index 9b5badbc81f9ddf083c81f57f5355e07a8e5e4a2..a8030d377fdb4d4aef74b315e21792dad10fac96 100644 --- a/paddle/fluid/framework/op_proto_maker_test.cc +++ b/paddle/fluid/framework/op_proto_maker_test.cc @@ -28,10 +28,8 @@ TEST(ProtoMaker, DuplicatedAttr) { paddle::framework::proto::OpProto op_proto; paddle::framework::OpAttrChecker op_checker; TestAttrProtoMaker proto_maker; - proto_maker.SetProto(&op_proto); - proto_maker.SetChecker(&op_checker); - proto_maker.Make(); - ASSERT_THROW(proto_maker.Validate(), paddle::platform::EnforceNotMet); + ASSERT_THROW(proto_maker(&op_proto, &op_checker), + paddle::platform::EnforceNotMet); } class TestInOutProtoMaker : public paddle::framework::OpProtoAndCheckerMaker { @@ -46,8 +44,6 @@ TEST(ProtoMaker, DuplicatedInOut) { paddle::framework::proto::OpProto op_proto; paddle::framework::OpAttrChecker op_checker; TestAttrProtoMaker proto_maker; - proto_maker.SetProto(&op_proto); - proto_maker.SetChecker(&op_checker); - proto_maker.Make(); - ASSERT_THROW(proto_maker.Validate(), paddle::platform::EnforceNotMet); + ASSERT_THROW(proto_maker(&op_proto, &op_checker), + paddle::platform::EnforceNotMet); } diff --git a/paddle/fluid/pybind/const_value.cc b/paddle/fluid/pybind/const_value.cc index 3f28e616494ad1322708ad6403aaf50b22d724e6..9111abca5aac97e9d5c7b00ce5173f08e49cda12 100644 --- a/paddle/fluid/pybind/const_value.cc +++ b/paddle/fluid/pybind/const_value.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/pybind/const_value.h" +#include #include "paddle/fluid/framework/operator.h" namespace paddle { @@ -23,6 +24,21 @@ void BindConstValue(pybind11::module* m) { m->def("kTempVarName", [] { return framework::kTempVarName; }); m->def("kGradVarSuffix", [] { return framework::kGradVarSuffix; }); m->def("kZeroVarSuffix", [] { return framework::kZeroVarSuffix; }); + + auto op_proto_and_checker_maker = + m->def_submodule("op_proto_and_checker_maker"); + + pybind11::enum_(op_proto_and_checker_maker, "OpRole") + .value("Forward", framework::OpRole::kForward) + .value("Backward", framework::OpRole::kBackward) + .value("Optimize", framework::OpRole::kOptimize) + .value("Loss", framework::OpRole::kLoss); + + op_proto_and_checker_maker.def( + "kOpRoleAttrName", framework::OpProtoAndCheckerMaker::OpRoleAttrName); + op_proto_and_checker_maker.def( + "kOpRoleVarAttrName", + framework::OpProtoAndCheckerMaker::OpRoleVarAttrName); } } // namespace pybind diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index 32b1b65bd97ef1e512a5880843509611b606f52d..4f9622d04dc98f41b503ceb780802d2a4e4c58a0 100644 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -51,6 +51,12 @@ def _create_op_desc_(op_type, inputs, outputs, attrs): op_desc.set_input(para, args) for para, args in outputs.iteritems(): op_desc.set_output(para, args) + + op_role_attr_name = core.op_proto_and_checker_maker.kOpRoleAttrName() + + if op_role_attr_name not in attrs: + attrs[ + op_role_attr_name] = core.op_proto_and_checker_maker.OpRole.Backward for name, val in attrs.iteritems(): if isinstance(val, framework.Block): op_desc.set_block_attr(name, val.desc) @@ -141,7 +147,7 @@ def _addup_repetitive_outputs_(op_descs): else: if len(renamed_vars[var_name]) == 1: new_name = var_name + "@RENAME@" + \ - str(var_rename_count[var_name]) + str(var_rename_count[var_name]) var_rename_count[var_name] += 1 # rename original var_name renamed_vars[var_name][0] = new_name @@ -149,7 +155,7 @@ def _addup_repetitive_outputs_(op_descs): _rename_arg_(pending_sum_ops, var_name, new_name) new_name = var_name + "@RENAME@" + \ - str(var_rename_count[var_name]) + str(var_rename_count[var_name]) var_rename_count[var_name] += 1 op_desc.rename_output(var_name, new_name) renamed_vars[var_name].append(new_name) @@ -335,9 +341,12 @@ def _append_backward_ops_(block, no_grad_dict[block.idx]) # append op_desc in grad_op_descs to target_block + op_role_attr_name = core.op_proto_and_checker_maker.kOpRoleAttrName() + backward = core.op_proto_and_checker_maker.OpRole.Backward for op_desc in grad_op_descs: new_op_desc = target_block.desc.append_op() new_op_desc.copy_from(op_desc) + new_op_desc.set_attr(op_role_attr_name, backward) grad_to_var["__current_op_desc__"] = new_op_desc if callbacks is not None: assert (isinstance(callbacks, list)) @@ -439,6 +448,22 @@ def append_backward(loss, parameter_list=None, no_grad_set=None, (list[(Variable,Variable)]): list of (parameter, gradient) pair. """ assert isinstance(loss, framework.Variable) + + if loss.op is None: + # the loss is from a cloned program. Find loss op manually. + for op in reversed(loss.block.ops): + assert isinstance(op, framework.Operator) + if len(op.output_arg_names) == 1 and op.output_arg_names[ + 0] == loss.name: + loss.op = op + break + if loss.op is None: + raise ValueError("loss.op is None. Should not happend") + + loss.op.set_attr(core.op_proto_and_checker_maker.kOpRoleAttrName(), + int(core.op_proto_and_checker_maker.OpRole.Forward) | + int(core.op_proto_and_checker_maker.OpRole.Loss)) + if callbacks is not None: isinstance(callbacks, list) @@ -456,12 +481,16 @@ def append_backward(loss, parameter_list=None, no_grad_set=None, current_block_idx = program.current_block_idx grad_to_var = dict() - op_desc = _create_op_desc_("fill_constant", {}, { - "Out": [_append_grad_suffix_(loss.name)] - }, {"shape": [1], - "value": 1.0, - "dtype": loss.dtype, - "force_cpu": False}) + op_desc = _create_op_desc_( + "fill_constant", {}, {"Out": [_append_grad_suffix_(loss.name)]}, { + "shape": [1], + "value": 1.0, + "dtype": loss.dtype, + "force_cpu": False, + core.op_proto_and_checker_maker.kOpRoleAttrName(): + int(core.op_proto_and_checker_maker.OpRole.Backward) | + int(core.op_proto_and_checker_maker.OpRole.Loss), + }) root_block.desc.append_op().copy_from(op_desc) block_no_grad_set = set(map(_strip_grad_suffix_, no_grad_dict[0])) @@ -505,6 +534,24 @@ def append_backward(loss, parameter_list=None, no_grad_set=None, params_and_grads.append((param_var, grad_var)) else: params_and_grads.append((param_var, None)) + + op_role_var_attr_name = core.op_proto_and_checker_maker.kOpRoleVarAttrName() + for p, g in params_and_grads: + if g is None: + continue + for op in reversed(program.global_block().ops): + assert isinstance(op, framework.Operator) + if g.name in op.output_arg_names: + g.op = op + break + + if g.op is None: + raise ValueError("Unexpected branch") + attr_val = [p.name, g.name] + if g.op.has_attr(op_role_var_attr_name): + attr_val.extend(g.op.attr(op_role_var_attr_name)) + g.op.set_attr(op_role_var_attr_name, attr_val) + return params_and_grads diff --git a/python/paddle/fluid/clip.py b/python/paddle/fluid/clip.py index 12add9e686910c3936cf17fe87a5d0b78443b270..66c3fc6b66d61bc9578f84594409ad0f24c99910 100644 --- a/python/paddle/fluid/clip.py +++ b/python/paddle/fluid/clip.py @@ -214,21 +214,24 @@ def set_gradient_clip(clip, param_list=None, program=None): def append_gradient_clip_ops(param_grad): context = dict() - create_op_callbacks = [] for p, g in param_grad: - clip_attr = getattr(p, 'gradient_clip_attr', NullGradientClipAttr()) - if clip_attr is None: - clip_attr = NullGradientClipAttr() - if not isinstance(clip_attr, BaseGradientClipAttr): - raise TypeError( - "clip attribute should be an instance of BaseGradientClipAttr") + with p.block.program.optimized_guard(p): + clip_attr = getattr(p, 'gradient_clip_attr', NullGradientClipAttr()) + if clip_attr is None: + clip_attr = NullGradientClipAttr() + if not isinstance(clip_attr, BaseGradientClipAttr): + raise TypeError( + "clip attribute should be an instance of BaseGradientClipAttr" + ) - clip_attr.process_context(context=context, param=p, grad=g) - create_op_callbacks.append( - functools.partial( - clip_attr.create_operators, param=p, grad=g)) + clip_attr.process_context(context=context, param=p, grad=g) + + res = [] + for p, g in param_grad: + with p.block.program.optimized_guard(p): + res.append(clip_attr.create_operators(param=p, grad=g)) - return [each_callback() for each_callback in create_op_callbacks] + return res ClipByValue = GradientClipByValue diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 161ea55586bbb6bde2cbb0084bb67b184f91460e..08b756d95b9b72db5d978afbe437bbfcb52025b0 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -404,6 +404,23 @@ class Operator(object): self.block = block self.desc = desc self.attrs = attrs + if self.attrs is None: + self.attrs = dict() + del attrs + + op_maker = core.op_proto_and_checker_maker + + if op_maker.kOpRoleAttrName() not in self.attrs: + self.attrs[op_maker.kOpRoleAttrName()] = self.block.program.op_role + + role_var_name = op_maker.kOpRoleVarAttrName() + if len(self.block.program. + op_role_var) != 0 and role_var_name not in self.attrs: + self.attrs[role_var_name] = self.block.program.op_role_var + + if role_var_name in self.attrs and len(self.attrs[role_var_name]) == 0: + del self.attrs[role_var_name] + if len(self.desc.type()) != 0: return if type is None: @@ -469,22 +486,23 @@ class Operator(object): arg.op = self self.desc.set_output(out_proto.name, out_arg_names) - if attrs is not None: - if not isinstance(attrs, dict): + if self.attrs is not None: + if not isinstance(self.attrs, dict): raise TypeError("'attrs' should be a dict.") for attr in proto.attrs: attr_name = attr.name - if (attr_name not in attrs) or (attrs[attr_name] is None): + if (attr_name not in self.attrs) or ( + self.attrs[attr_name] is None): continue - if isinstance(attrs[attr_name], Block): - self.desc.set_block_attr(attr_name, attrs[attr_name].desc) - elif isinstance(attrs[attr_name], core.BlockDesc) or \ - isinstance(attrs[attr_name], core.ProgramDesc): + if isinstance(self.attrs[attr_name], Block): + self.desc.set_block_attr(attr_name, + self.attrs[attr_name].desc) + elif isinstance(self.attrs[attr_name], core.BlockDesc) or \ + isinstance(self.attrs[attr_name], core.ProgramDesc): self.desc.set_serialized_attr( - attr_name, attrs[attr_name].serialize_to_string()) + attr_name, self.attrs[attr_name].serialize_to_string()) else: - self.desc.set_attr(attr_name, attrs[attr_name]) - + self.desc.set_attr(attr_name, self.attrs[attr_name]) self.desc.check_attrs() no_kernel_op_set = { 'feed', 'fetch', 'save', 'load', 'recurrent', 'go', @@ -612,6 +630,10 @@ class Operator(object): """ return self.desc.attr_type(name) + def set_attr(self, name, val): + self.attrs[name] = val + self.desc.set_attr(name, val) + @property def attr_names(self): """ @@ -1002,6 +1024,33 @@ class Program(object): self.blocks = [Block(self, 0)] self.current_block_idx = 0 self._seed = 0 + self._current_role = core.op_proto_and_checker_maker.OpRole.Forward + self._op_role_var = [] + + @property + def op_role(self): + return self._current_role + + @op_role.setter + def set_op_role(self, role): + self._current_role = role + + @property + def op_role_var(self): + return self._op_role_var + + @op_role_var.setter + def set_op_role_var(self, var_name): + self._op_role_var = [var_name] + + @contextlib.contextmanager + def optimized_guard(self, var): + OpRole = core.op_proto_and_checker_maker.OpRole + self._current_role = OpRole.Optimize + self._op_role_var = [var.name if isinstance(var, Variable) else var] + yield + self._op_role_var = [] + self._current_role = OpRole.Forward def __str__(self): return self.to_string(True) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index a0deaca78eed75e9faee7bb38493afc181eb212f..115362c6bf33018342699a442c688e7356f3c206 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -213,11 +213,13 @@ class Optimizer(object): optimize_ops = [] for param_and_grad in parameters_and_grads: - if param_and_grad[0].trainable is True and param_and_grad[ - 1] is not None: - optimize_op = self._append_optimize_op(loss.block, - param_and_grad) - optimize_ops.append(optimize_op) + with param_and_grad[0].block.program.optimized_guard( + param_and_grad[0]): + if param_and_grad[0].trainable is True and param_and_grad[ + 1] is not None: + optimize_op = self._append_optimize_op(loss.block, + param_and_grad) + optimize_ops.append(optimize_op) # Get custom finish ops for subclasses # FIXME: Need to fix this once we figure out how to handle dependencies diff --git a/python/paddle/fluid/regularizer.py b/python/paddle/fluid/regularizer.py index c006bd9a66ddb422b7d80d2ca87aa7f56a6485db..c4d6829599616cb3ea7791a189e7070974de6ae3 100644 --- a/python/paddle/fluid/regularizer.py +++ b/python/paddle/fluid/regularizer.py @@ -43,31 +43,32 @@ def append_regularization_ops(parameters_and_grads, regularization=None): """ params_and_grads = [] for param, grad in parameters_and_grads: - # If no gradient then we don't need to do anything - if grad is None: + with param.block.program.optimized_guard(param): + # If no gradient then we don't need to do anything + if grad is None: + params_and_grads.append((param, grad)) + continue + + regularization_term = None + if param.regularizer is not None: + # Add variable for regularization term in grad block + regularization_term = param.regularizer(param, grad, grad.block) + elif regularization is not None: + regularization_term = regularization(param, grad, grad.block) + + # If no regularization specified, then we don't need to do anything + if regularization_term is None: + params_and_grads.append((param, grad)) + continue + + assert grad.shape == regularization_term.shape + + grad.block.append_op( + type='elementwise_add', + inputs={"X": grad, + "Y": regularization_term}, + outputs={"Out": grad}) params_and_grads.append((param, grad)) - continue - - regularization_term = None - if param.regularizer is not None: - # Add variable for regularization term in grad block - regularization_term = param.regularizer(param, grad, grad.block) - elif regularization is not None: - regularization_term = regularization(param, grad, grad.block) - - # If no regularization specified, then we don't need to do anything - if regularization_term is None: - params_and_grads.append((param, grad)) - continue - - assert grad.shape == regularization_term.shape - - grad.block.append_op( - type='elementwise_add', - inputs={"X": grad, - "Y": regularization_term}, - outputs={"Out": grad}) - params_and_grads.append((param, grad)) return params_and_grads diff --git a/python/paddle/fluid/tests/unittests/op_test.py b/python/paddle/fluid/tests/unittests/op_test.py index 299ab8e51f017e1980a8b40e3830fc42b1ff7ccc..709b4bf2fcfb180c747ba3539711a58a57e3b77f 100644 --- a/python/paddle/fluid/tests/unittests/op_test.py +++ b/python/paddle/fluid/tests/unittests/op_test.py @@ -36,6 +36,12 @@ def randomize_probability(batch_size, class_num, dtype='float32'): def create_op(scope, op_type, inputs, outputs, attrs): kwargs = dict() + op_maker = core.op_proto_and_checker_maker + op_role_attr_name = op_maker.kOpRoleAttrName() + + if op_role_attr_name not in attrs: + attrs[op_role_attr_name] = int(op_maker.OpRole.Forward) + def __create_var__(name, var_name): scope.var(var_name).get_tensor() kwargs[name].append(var_name) diff --git a/python/paddle/fluid/tests/unittests/test_operator_desc.py b/python/paddle/fluid/tests/unittests/test_operator_desc.py index 779ae388f04496a7be9a6d5aa4e39b8245022925..8b15aa6822aee7bb4d53dcf1d87565fae5504821 100644 --- a/python/paddle/fluid/tests/unittests/test_operator_desc.py +++ b/python/paddle/fluid/tests/unittests/test_operator_desc.py @@ -63,7 +63,10 @@ class TestOperator(unittest.TestCase): self.assertEqual(mul_op.output("Out"), ["mul.out"]) self.assertEqual( set(mul_op.attr_names), - set(["x_num_col_dims", "y_num_col_dims", "use_mkldnn"])) + set([ + "x_num_col_dims", "y_num_col_dims", "use_mkldnn", "op_role", + "op_role_var" + ])) self.assertEqual(mul_op.has_attr("x_num_col_dims"), True) self.assertEqual(mul_op.attr_type("x_num_col_dims"), core.AttrType.INT) self.assertEqual(mul_op.attr("x_num_col_dims"), 1)