diff --git a/paddle/fluid/framework/details/build_strategy.cc b/paddle/fluid/framework/details/build_strategy.cc index 216fb66c034a0980b68641004077e4d50b19983c..e4b0aa21a03d279ceb074f03b51afe6c2d996fe2 100644 --- a/paddle/fluid/framework/details/build_strategy.cc +++ b/paddle/fluid/framework/details/build_strategy.cc @@ -43,6 +43,12 @@ static inline bool SeqOnlyAllReduceOps(const BuildStrategy &strategy) { !strategy.enable_parallel_graph_; } +static inline void ConvertDefaultValue(boost::optional *default_value) { + if (*default_value == boost::none) { + *default_value = true; + } +} + class ParallelExecutorPassBuilder : public ir::PassBuilder { public: explicit ParallelExecutorPassBuilder(const BuildStrategy &strategy) @@ -79,39 +85,55 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder { void ResolveOptionConfliction() { // Specifies the restrictions between different pass. if (strategy_.enable_parallel_graph_) { - VLOG_IF(3, strategy_.fuse_all_optimizer_ops_) + LOG_IF(WARNING, strategy_.fuse_all_optimizer_ops_ == true) << "Currently, fuse_all_optimizer_ops doesn't work under " "parallel_graph."; strategy_.fuse_all_optimizer_ops_ = false; - VLOG_IF(3, strategy_.fuse_all_reduce_ops_) + LOG_IF(WARNING, strategy_.fuse_all_reduce_ops_ == true) << "fuse_all_reduce_ops doesn't work under " "parallel_graph."; strategy_.fuse_all_reduce_ops_ = false; } if (strategy_.is_distribution_) { - VLOG_IF(3, strategy_.fuse_all_optimizer_ops_) + LOG_IF(WARNING, strategy_.fuse_all_optimizer_ops_ == true) << "Currently, fuse_all_optimizer_ops only works under " "Non-distributed mode."; strategy_.fuse_all_optimizer_ops_ = false; - VLOG_IF(3, strategy_.fuse_all_reduce_ops_) + LOG_IF(WARNING, strategy_.fuse_all_reduce_ops_ == true) << "Currently, fuse_all_reduce_ops_ only works under " "Non-distributed mode."; strategy_.fuse_all_reduce_ops_ = false; } if (strategy_.reduce_ == BuildStrategy::ReduceStrategy::kReduce) { - VLOG_IF(3, strategy_.fuse_all_optimizer_ops_) + LOG_IF(WARNING, strategy_.fuse_all_optimizer_ops_ == true) << "Currently, fuse_all_optimizer_ops only works under AllReduce " "mode."; strategy_.fuse_all_optimizer_ops_ = false; - VLOG_IF(3, strategy_.fuse_all_reduce_ops_) + LOG_IF(WARNING, strategy_.fuse_all_reduce_ops_ == true) << "fuse_all_optimizer_ops only work in Reducer mode."; strategy_.fuse_all_reduce_ops_ = false; } - if (strategy_.async_mode_) { - VLOG_IF(3, strategy_.fuse_all_optimizer_ops_) + if (strategy_.reduce_ == BuildStrategy::ReduceStrategy::kAllReduce) { + LOG_IF(WARNING, strategy_.fuse_broadcast_ops_ == true) + << "Currently, fuse_broadcast_ops only works under Reduce " + "mode."; + strategy_.fuse_broadcast_ops_ = false; + } + + ConvertDefaultValue(&strategy_.fuse_all_optimizer_ops_); + ConvertDefaultValue(&strategy_.fuse_all_reduce_ops_); + ConvertDefaultValue(&strategy_.fuse_broadcast_ops_); + + if (strategy_.fuse_all_optimizer_ops_ == true) { + LOG_IF(WARNING, strategy_.async_mode_) << "Currently, fuse_all_optimizer_ops doesn't work under " "async mode."; - strategy_.fuse_all_optimizer_ops_ = false; + strategy_.fuse_all_optimizer_ops_ = !strategy_.async_mode_; + } + if (strategy_.fuse_all_reduce_ops_ == true) { + LOG_IF(WARNING, strategy_.async_mode_) + << "fuse_all_optimizer_ops only work in Reducer mode."; + strategy_.fuse_all_reduce_ops_ = !strategy_.async_mode_; } } @@ -151,7 +173,7 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder { // NOTE: fuse_all_xx_ops will count the number of xx operator first, // if the number is zero, fuse_all_reduce_ops will do nothing. // Currently, only one type of optimization algorithm can be fused. - if (strategy_.fuse_all_optimizer_ops_) { + if (strategy_.fuse_all_optimizer_ops_ == true) { AppendPass("fuse_adam_op_pass"); AppendPass("fuse_sgd_op_pass"); AppendPass("fuse_momentum_op_pass"); @@ -207,6 +229,11 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder { } } + void AppendPassWithCheck(const boost::optional &append_pass, + const std::string &pass_name) { + AppendPassWithCheck(append_pass == true, pass_name); + } + void AppendPassWithCheck(bool append_pass, const std::string &pass_name) { if (append_pass) { AppendPass(pass_name); diff --git a/paddle/fluid/framework/details/build_strategy.h b/paddle/fluid/framework/details/build_strategy.h index 47409b89bcfe81b814af4fc59c65668aa4f3804a..5f0cc4b215c0f52de1ff529db41bc4aaae89c696 100644 --- a/paddle/fluid/framework/details/build_strategy.h +++ b/paddle/fluid/framework/details/build_strategy.h @@ -89,8 +89,8 @@ struct BuildStrategy { bool fuse_elewise_add_act_ops_{false}; // Fuse_all_optimizer_ops and fuse_all_reduce_ops require that gradients // should not be sparse types - bool fuse_all_optimizer_ops_{true}; - bool fuse_all_reduce_ops_{false}; + boost::optional fuse_all_optimizer_ops_{boost::none}; + boost::optional fuse_all_reduce_ops_{boost::none}; // fuse_relu_depthwise_conv can fuse the `relu -> // depthwise_conv` bool fuse_relu_depthwise_conv_{false}; @@ -98,7 +98,7 @@ struct BuildStrategy { // faster. Because fusing broadcast OP equals delaying the execution of all // broadcast Ops, in this case, all nccl streams are used only for reduce // operations for a period of time. - bool fuse_broadcast_ops_{true}; + boost::optional fuse_broadcast_ops_{boost::none}; // replace batch_norm with sync_batch_norm. bool sync_batch_norm_{false}; diff --git a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_adam_op_pass.cc b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_adam_op_pass.cc index 997f18d70999f2a7f3c5e7214c8ddcd027101b4f..8aec098720bb1ed6988da6ef2d213713c539a053 100644 --- a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_adam_op_pass.cc +++ b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_adam_op_pass.cc @@ -124,7 +124,7 @@ class FuseAdamOpPass : public FuseOptimizerOpPass { // NOTE: fused_var is only exist in scope, so the graph doesn't have // fused_var node. - VLOG(7) << "Insert adam to graph "; + VLOG(6) << "Insert adam to graph "; OpDesc adam_desc(adam_ops[0]->Op()->Block()); adam_desc.SetType("adam"); adam_desc.SetInput(kParam, {fused_vars_name.at(kParam)}); @@ -180,7 +180,7 @@ class FuseAdamOpPass : public FuseOptimizerOpPass { scale_ops.emplace_back(*scale_op_iter); } PADDLE_ENFORCE_EQ(scale_ops.size(), beta_name.size()); - VLOG(7) << "The number of scale op is " << scale_ops.size() << "."; + VLOG(6) << "The number of scale op is " << scale_ops.size() << "."; // Check attributions // NOTE: If new attribution is added, the following code maybe need change. int op_role = boost::get( @@ -205,7 +205,7 @@ class FuseAdamOpPass : public FuseOptimizerOpPass { // NOTE: fused_var is only exist in scope, so the graph doesn't have // fused_var node. - VLOG(7) << "Insert fused scale to graph."; + VLOG(6) << "Insert fused scale to graph."; OpDesc scale_desc(scale_ops[0]->Op()->Block()); scale_desc.SetType("scale"); scale_desc.SetInput("X", {fused_var_name}); diff --git a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_momentum_op_pass.cc b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_momentum_op_pass.cc index ef958c7364f3adb2e4a0ce669e7151680beacccc..8f3a623a9883b6b4e638da7c39b0d1f9d78c0488 100644 --- a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_momentum_op_pass.cc +++ b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_momentum_op_pass.cc @@ -61,7 +61,7 @@ class FuseMomentumOpPass : public FuseOptimizerOpPass { // NOTE: fused_var is only exist in scope, so the graph doesn't have // fused_var node. - VLOG(7) << "Insert momentum to graph "; + VLOG(6) << "Insert momentum to graph "; OpDesc momentum_desc(momentum_ops[0]->Op()->Block()); momentum_desc.SetType("momentum"); momentum_desc.SetInput(kParam, {fused_vars_name.at(kParam)}); diff --git a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc index 74e2f93d9fd66cacc8c6d09b49d5bf727cc8c5de..fcb5604a07c8140e1dbc02634b06d65641e5bf4b 100644 --- a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc +++ b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc @@ -49,7 +49,7 @@ void FuseOptimizerOpPass::ApplyImpl(ir::Graph *graph) const { } VLOG(6) << "Find " << fuse_op_type << " operators : " << opt_ops_num - << ", and " << opt_nodes.size() << " for dense gradients "; + << ", and " << opt_nodes.size() << " for dense gradients."; if (opt_nodes.size() == 0 || result.Has(details::kFusedOptType)) { if (result.Has(details::kFusedOptType)) { auto &opt_type = @@ -69,6 +69,11 @@ void FuseOptimizerOpPass::ApplyImpl(ir::Graph *graph) const { return; } + LOG(WARNING) << "Find " << fuse_op_type << " operators : " << opt_ops_num + << ", and " << opt_nodes.size() << " for dense gradients. " + << "To make the speed faster, those optimization are fused " + "during training."; + result.Set(details::kFusedOptType, new details::FusedOptType); result.Get(details::kFusedOptType) = fuse_op_type; if (!result.Has(details::kProgramDescs)) { @@ -149,7 +154,7 @@ void FuseOptimizerOpPass::ApplyImpl(ir::Graph *graph) const { &opt_nodes); grad_fused = true; } else { - VLOG(10) << "The number of new gradients is " << new_grad_idx.size(); + VLOG(6) << "The number of new gradients is " << new_grad_idx.size(); if (new_grad_idx.size() == 1) return; // NOTE(zcd): If the gradients of backward stage and optimization stage // have diff, Only take care of the the gradient of optimization stage. diff --git a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_sgd_op_pass.cc b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_sgd_op_pass.cc index b202a6506d93a3e16dfb7474f46431dd91fc1f61..3dd54cbc3c983e26072c09d6af48688965098611 100644 --- a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_sgd_op_pass.cc +++ b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_sgd_op_pass.cc @@ -42,7 +42,7 @@ class FuseSgdOpPass : public FuseOptimizerOpPass { int op_role = boost::get( sgd_ops[0]->Op()->GetAttr(OpProtoAndCheckerMaker::OpRoleAttrName())); - VLOG(7) << "Insert sgd to graph "; + VLOG(6) << "Insert sgd to graph."; // Add fused scale OpDesc Sgd_desc(sgd_ops[0]->Op()->Block()); Sgd_desc.SetType("sgd"); diff --git a/paddle/fluid/framework/ir/multi_devices_graph_pass/fuse_all_reduce_op_pass.cc b/paddle/fluid/framework/ir/multi_devices_graph_pass/fuse_all_reduce_op_pass.cc index a7815b71e41feef0376bca6f8d1d9e575d1a1513..73d7bf6dba0f01cc53ed0e9010c2da88afd6e384 100644 --- a/paddle/fluid/framework/ir/multi_devices_graph_pass/fuse_all_reduce_op_pass.cc +++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/fuse_all_reduce_op_pass.cc @@ -56,7 +56,7 @@ class FuseAllReduceOpPass : public ir::Pass { std::unordered_map all_reduce_ops = GetAllReduceOps(result, places, grads); - VLOG(10) << "Find all_reduce_ops: " << all_reduce_ops.size(); + VLOG(6) << "Find all_reduce_ops: " << all_reduce_ops.size(); if (all_reduce_ops.size() == 0) { return; } @@ -65,11 +65,16 @@ class FuseAllReduceOpPass : public ir::Pass { "The number of all_reduce OpHandle is not equal to the " "number of grads. Maybe some gradients are sparse type, " "it is not supported currently."); - VLOG(10) << "Insert fused_all_reduce"; auto &group_params_grads = graph->Get( details::kGroupParamsAndDenseGrads); + LOG(WARNING) << string::Sprintf( + "Find all_reduce operators: %d. To make the speed faster, some " + "all_reduce ops are fused during training, after fusion, " + "the number of all_reduce ops is %d.", + all_reduce_ops.size(), group_params_grads.size()); + for (auto &group_p_g : group_params_grads) { size_t group_size = group_p_g.size(); PADDLE_ENFORCE_GT(group_size, static_cast(0)); diff --git a/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc b/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc index 304446f82d65b9f276d857771d210db3112bf853..224ab21b4788f99b91e343f06afa55dcb2a69a82 100644 --- a/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc +++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc @@ -699,7 +699,7 @@ bool ReduceSSAGraphBuilder::DealWithSpecialOp(ir::Graph *result, void ReduceSSAGraphBuilder::InsertPostprocessOps(ir::Graph *result) const { if (UseGPU()) { - if (strategy_.fuse_broadcast_ops_) { + if (strategy_.fuse_broadcast_ops_ == true) { CreateFusedBroadcastOp(result, bcast_var_name_set_); } else { for (size_t dev_id = 0; dev_id < bcast_var_name_set_.size(); ++dev_id) { @@ -1068,7 +1068,7 @@ void DistSSAGraphBuilder::InsertPostprocessOps(ir::Graph *result) const { strategy_.reduce_ == details::BuildStrategy::ReduceStrategy::kReduce) { return; } - if (strategy_.fuse_broadcast_ops_) { + if (strategy_.fuse_broadcast_ops_ == true) { CreateFusedBroadcastOp(result, bcast_var_name_set_); } else { for (size_t dev_id = 0; dev_id < bcast_var_name_set_.size(); ++dev_id) { diff --git a/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.h b/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.h index 1f2eed05fecb3ab71a7e52c49169a93627145e0d..ea0455b6a8b16f1bf3370fc75bbf3b7b7f7545a8 100644 --- a/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.h +++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.h @@ -123,7 +123,7 @@ class AsyncSSAGraphBuilder : public MultiDevSSAGraphBuilderBase { const std::string &g_name) const override {} bool NeedCollectiveForGrad(const std::string &grad_name, - std::vector ops) const { + std::vector ops) const override { return false; } diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index ad9501af6b9a32365e8985f16fd8f017f6f290ba..768839ec11bba9e735575b7b6dfe9119fc19fb43 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -338,8 +338,8 @@ PYBIND11_MODULE(core_noavx, m) { recursive_sequence_lengths.end(), std::back_inserter(new_lod)); LoD new_offset_lod = ConvertToOffsetBasedLoD(new_lod); - PADDLE_ENFORCE( - CheckLoD(new_offset_lod, -1), + PADDLE_ENFORCE_EQ( + CheckLoD(new_offset_lod, -1), true, "the provided recursive_sequence_lengths info is invalid"); new (&instance) LoDTensor(new_offset_lod); }) @@ -355,8 +355,9 @@ PYBIND11_MODULE(core_noavx, m) { LoD new_lod; new_lod.reserve(lod.size()); std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); - PADDLE_ENFORCE(CheckLoD(new_lod, vectorize(self.dims()).front()), - "the provided lod info is invalid"); + PADDLE_ENFORCE_EQ( + CheckLoD(new_lod, vectorize(self.dims()).front()), true, + "the provided lod info is invalid"); self.set_lod(new_lod); }, py::arg("lod"), R"DOC( @@ -386,8 +387,8 @@ PYBIND11_MODULE(core_noavx, m) { recursive_sequence_lengths.end(), std::back_inserter(new_lod)); LoD new_offset_lod = ConvertToOffsetBasedLoD(new_lod); - PADDLE_ENFORCE( - CheckLoD(new_offset_lod, vectorize(self.dims()).front()), + PADDLE_ENFORCE_EQ( + CheckLoD(new_offset_lod, vectorize(self.dims()).front()), true, "the provided recursive_sequence_lengths info is invalid"); self.set_lod(new_offset_lod); }, @@ -588,7 +589,7 @@ All parameter, weight, gradient are variables in Paddle. #endif .def("get_reader", [](Variable &self) -> framework::ReaderHolder * { - PADDLE_ENFORCE(self.IsType()); + PADDLE_ENFORCE_EQ(self.IsType(), true); return self.GetMutable(); }, py::return_value_policy::reference); @@ -713,8 +714,8 @@ All parameter, weight, gradient are variables in Paddle. auto &info = iter.second; if (info.HasOpProtoAndChecker()) { std::string str; - PADDLE_ENFORCE( - info.Proto().SerializeToString(&str), + PADDLE_ENFORCE_EQ( + info.Proto().SerializeToString(&str), true, "Serialize OpProto Error. This could be a bug of Paddle."); ret_values.emplace_back(str); } @@ -942,16 +943,17 @@ All parameter, weight, gradient are variables in Paddle. }); py::class_(m, "Operator") - .def_static("create", - [](py::bytes protobin) { - proto::OpDesc desc; - PADDLE_ENFORCE(desc.ParsePartialFromString(protobin), - "Cannot parse user input to OpDesc"); - PADDLE_ENFORCE(desc.IsInitialized(), - "User OpDesc is not initialized, reason %s", - desc.InitializationErrorString()); - return OpRegistry::CreateOp(desc); - }) + .def_static( + "create", + [](py::bytes protobin) { + proto::OpDesc desc; + PADDLE_ENFORCE_EQ(desc.ParsePartialFromString(protobin), true, + "Cannot parse user input to OpDesc"); + PADDLE_ENFORCE_EQ(desc.IsInitialized(), true, + "User OpDesc is not initialized, reason %s", + desc.InitializationErrorString()); + return OpRegistry::CreateOp(desc); + }) .def("run", [](OperatorBase &self, const Scope &scope, const platform::CPUPlace &place) { self.Run(scope, place); }) @@ -1323,7 +1325,8 @@ All parameter, weight, gradient are variables in Paddle. "reduce_strategy", [](const BuildStrategy &self) { return self.reduce_; }, [](BuildStrategy &self, BuildStrategy::ReduceStrategy strategy) { - PADDLE_ENFORCE(!self.IsFinalized(), "BuildStrategy is finlaized."); + PADDLE_ENFORCE_EQ(!self.IsFinalized(), true, + "BuildStrategy is finlaized."); self.reduce_ = strategy; }, R"DOC(The type is fluid.BuildStrategy.ReduceStrategy, there are two reduce @@ -1346,7 +1349,8 @@ All parameter, weight, gradient are variables in Paddle. [](const BuildStrategy &self) { return self.gradient_scale_; }, [](BuildStrategy &self, BuildStrategy::GradientScaleStrategy strategy) { - PADDLE_ENFORCE(!self.IsFinalized(), "BuildStrategy is finalized."); + PADDLE_ENFORCE_EQ(!self.IsFinalized(), true, + "BuildStrategy is finalized."); self.gradient_scale_ = strategy; }, R"DOC(The type is fluid.BuildStrategy.GradientScaleStrategy, there are three @@ -1407,7 +1411,8 @@ All parameter, weight, gradient are variables in Paddle. "debug_graphviz_path", [](const BuildStrategy &self) { return self.debug_graphviz_path_; }, [](BuildStrategy &self, const std::string &path) { - PADDLE_ENFORCE(!self.IsFinalized(), "BuildStrategy is finlaized."); + PADDLE_ENFORCE_EQ(!self.IsFinalized(), true, + "BuildStrategy is finlaized."); self.debug_graphviz_path_ = path; }, R"DOC(The type is STR, debug_graphviz_path indicates the path that @@ -1428,7 +1433,8 @@ All parameter, weight, gradient are variables in Paddle. return self.enable_sequential_execution_; }, [](BuildStrategy &self, bool b) { - PADDLE_ENFORCE(!self.IsFinalized(), "BuildStrategy is finlaized."); + PADDLE_ENFORCE_EQ(!self.IsFinalized(), true, + "BuildStrategy is finlaized."); self.enable_sequential_execution_ = b; }, R"DOC(The type is BOOL. If set True, the execution order of ops would @@ -1447,7 +1453,8 @@ All parameter, weight, gradient are variables in Paddle. return self.remove_unnecessary_lock_; }, [](BuildStrategy &self, bool b) { - PADDLE_ENFORCE(!self.IsFinalized(), "BuildStrategy is finlaized."); + PADDLE_ENFORCE_EQ(!self.IsFinalized(), true, + "BuildStrategy is finlaized."); self.remove_unnecessary_lock_ = b; }, R"DOC(The type is BOOL. If set True, some locks in GPU ops would be @@ -1508,7 +1515,8 @@ All parameter, weight, gradient are variables in Paddle. return self.fuse_elewise_add_act_ops_; }, [](BuildStrategy &self, bool b) { - PADDLE_ENFORCE(!self.IsFinalized(), "BuildStrategy is finlaized."); + PADDLE_ENFORCE_EQ(!self.IsFinalized(), true, + "BuildStrategy is finlaized."); self.fuse_elewise_add_act_ops_ = b; }, R"DOC(The type is BOOL, fuse_elewise_add_act_ops indicate whether @@ -1528,7 +1536,8 @@ All parameter, weight, gradient are variables in Paddle. return self.fuse_relu_depthwise_conv_; }, [](BuildStrategy &self, bool b) { - PADDLE_ENFORCE(!self.IsFinalized(), "BuildStrategy is finlaized."); + PADDLE_ENFORCE_EQ(!self.IsFinalized(), true, + "BuildStrategy is finlaized."); self.fuse_relu_depthwise_conv_ = b; }, R"DOC(The type is BOOL, fuse_relu_depthwise_conv indicate whether @@ -1544,14 +1553,17 @@ All parameter, weight, gradient are variables in Paddle. build_strategy = fluid.BuildStrategy() build_strategy.fuse_relu_depthwise_conv = True )DOC") - .def_property( - "fuse_broadcast_ops", - [](const BuildStrategy &self) { return self.fuse_broadcast_ops_; }, - [](BuildStrategy &self, bool b) { - PADDLE_ENFORCE(!self.IsFinalized(), "BuildStrategy is finlaized."); - self.fuse_broadcast_ops_ = b; - }, - R"DOC(The type is BOOL, fuse_broadcast_op indicates whether + .def_property("fuse_broadcast_ops", + [](const BuildStrategy &self) { + return self.fuse_broadcast_ops_ == true || + self.fuse_broadcast_ops_ == boost::none; + }, + [](BuildStrategy &self, bool b) { + PADDLE_ENFORCE_EQ(!self.IsFinalized(), true, + "BuildStrategy is finlaized."); + self.fuse_broadcast_ops_ = b; + }, + R"DOC(The type is BOOL, fuse_broadcast_op indicates whether to fuse the broadcast ops. Note that, in Reduce mode, fusing broadcast ops may make the program faster. Because fusing broadcast OP equals delaying the execution of all @@ -1559,18 +1571,20 @@ All parameter, weight, gradient are variables in Paddle. for NCCLReduce operations for a period of time. Default False.)DOC") .def_property("fuse_all_optimizer_ops", [](const BuildStrategy &self) { - return self.fuse_all_optimizer_ops_; + return self.fuse_all_optimizer_ops_ == true || + self.fuse_all_optimizer_ops_ == boost::none; }, [](BuildStrategy &self, bool b) { - PADDLE_ENFORCE(!self.IsFinalized(), - "BuildStrategy is finlaized."); + PADDLE_ENFORCE_EQ(!self.IsFinalized(), true, + "BuildStrategy is finlaized."); self.fuse_all_optimizer_ops_ = b; }) .def_property( "sync_batch_norm", [](const BuildStrategy &self) { return self.sync_batch_norm_; }, [](BuildStrategy &self, bool b) { - PADDLE_ENFORCE(!self.IsFinalized(), "BuildStrategy is finlaized."); + PADDLE_ENFORCE_EQ(!self.IsFinalized(), true, + "BuildStrategy is finlaized."); self.sync_batch_norm_ = b; }, R"DOC(The type is BOOL, sync_batch_norm indicates whether to use @@ -1637,7 +1651,10 @@ All parameter, weight, gradient are variables in Paddle. [](BuildStrategy &self, bool b) { self.enable_inplace_ = b; }) .def_property( "fuse_all_reduce_ops", - [](const BuildStrategy &self) { return self.fuse_all_reduce_ops_; }, + [](const BuildStrategy &self) { + return self.fuse_all_reduce_ops_ == true || + self.fuse_all_reduce_ops_ == boost::none; + }, [](BuildStrategy &self, bool b) { self.fuse_all_reduce_ops_ = b; }) .def_property("enable_backward_optimizer_op_deps", [](const BuildStrategy &self) {