From 56a903d3ac55c51d450f003878f504a41be5bc0b Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Fri, 22 Jun 2018 12:31:46 +0800 Subject: [PATCH] use optimize block list instead of first optimize block --- paddle/fluid/framework/framework.proto | 1 + paddle/fluid/framework/op_desc.cc | 13 +++++++++++++ paddle/fluid/framework/op_desc.h | 2 ++ paddle/fluid/framework/type_defs.h | 3 ++- paddle/fluid/operators/listen_and_serv_op.cc | 17 +++++++---------- paddle/fluid/pybind/protobuf.cc | 1 + python/paddle/fluid/framework.py | 6 ++++++ .../fluid/transpiler/distribute_transpiler.py | 9 +++------ 8 files changed, 35 insertions(+), 17 deletions(-) diff --git a/paddle/fluid/framework/framework.proto b/paddle/fluid/framework/framework.proto index 68fcc104d..8f73b3d47 100644 --- a/paddle/fluid/framework/framework.proto +++ b/paddle/fluid/framework/framework.proto @@ -46,6 +46,7 @@ message OpDesc { repeated bool bools = 11; optional int32 block_idx = 12; optional int64 l = 13; + repeated int32 blocks_idx = 14; }; message Var { diff --git a/paddle/fluid/framework/op_desc.cc b/paddle/fluid/framework/op_desc.cc index f92769192..a190199f1 100644 --- a/paddle/fluid/framework/op_desc.cc +++ b/paddle/fluid/framework/op_desc.cc @@ -211,6 +211,12 @@ void OpDesc::SetBlockAttr(const std::string &name, BlockDesc *block) { need_update_ = true; } +void OpDesc::SetBlocksAttr(const std::string &name, + std::vector blocks) { + this->attrs_[name] = blocks; + need_update_ = true; +} + void OpDesc::SetAttrMap( const std::unordered_map &attr_map) { attrs_ = attr_map; @@ -305,6 +311,13 @@ struct SetAttrDescVisitor : public boost::static_visitor { void operator()(const std::vector &v) const { VectorToRepeated(v, attr_->mutable_bools()); } + void operator()(const std::vector &v) const { + std::vector blocks_idx; + for (auto blk : v) { + blocks_idx.push_back(blk->ID()); + } + VectorToRepeated(blocks_idx, attr_->mutable_blocks_idx()); + } void operator()(BlockDesc *desc) const { attr_->set_block_idx(desc->ID()); } void operator()(int64_t v) const { attr_->set_l(v); } void operator()(boost::blank) const { PADDLE_THROW("Unexpected branch"); } diff --git a/paddle/fluid/framework/op_desc.h b/paddle/fluid/framework/op_desc.h index a02d3e269..74dd8ec00 100644 --- a/paddle/fluid/framework/op_desc.h +++ b/paddle/fluid/framework/op_desc.h @@ -77,6 +77,8 @@ class OpDesc { void SetBlockAttr(const std::string &name, BlockDesc *block); + void SetBlocksAttr(const std::string &name, std::vector blocks); + Attribute GetAttr(const std::string &name) const; Attribute GetNullableAttr(const std::string &name) const; diff --git a/paddle/fluid/framework/type_defs.h b/paddle/fluid/framework/type_defs.h index 4879209ec..e099e40f1 100644 --- a/paddle/fluid/framework/type_defs.h +++ b/paddle/fluid/framework/type_defs.h @@ -35,7 +35,8 @@ using VariableNameMap = std::map>; using Attribute = boost::variant, std::vector, std::vector, bool, - std::vector, BlockDesc*, int64_t>; + std::vector, BlockDesc*, int64_t, + std::vector>; using AttributeMap = std::unordered_map; diff --git a/paddle/fluid/operators/listen_and_serv_op.cc b/paddle/fluid/operators/listen_and_serv_op.cc index 0f2863cc5..3fc5ae6f2 100644 --- a/paddle/fluid/operators/listen_and_serv_op.cc +++ b/paddle/fluid/operators/listen_and_serv_op.cc @@ -101,14 +101,11 @@ void ListenAndServOp::RunSyncLoop( framework::Scope *recv_scope, const std::vector &prefetch_block_id_list) const { size_t num_blocks = program->Size(); - auto skip_sub_blks = Attr>("skip_sub_blks"); + auto optimize_blocks = + Attr>(kOptimizeBlocks); PADDLE_ENFORCE_GE(num_blocks, 2, "server program should have at least 2 blocks"); - std::vector optimize_block_id_list; - for (auto *block : optimize_blocks) { - optimize_block_id_list.push_back(block->ID()); - } auto optimize_prepared = executor->Prepare(*program, optimize_block_id_list); // Insert placeholder for block0 which holds current op itself. optimize_prepared.insert( @@ -136,10 +133,10 @@ void ListenAndServOp::RunSyncLoop( std::vector parallel_blkids; parallel_blkids.push_back(optimize_blocks[0]->ID()); double ts = GetTimestamp(); - for (size_t i = 1; i < optimize_block_id_list.size(); ++i) { + for (size_t i = 1; i < optimize_blocks.size(); ++i) { // skip the first optimize block because it is already in the // parallel_blkids. - int blkid = optimize_block_id_list[i]; + int blkid = optimize_blocks[i]->ID(); if (program->Block(blkid).Parent() != last_parent_blkid) { ParallelExecuteBlocks(parallel_blkids, executor, optimize_prepared, program, recv_scope); @@ -263,7 +260,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, Attr>(kOptimizeBlocks); PADDLE_ENFORCE(optimize_blocks.size() > 1, "optimize blocks should be 1 at least on the pserver side."); - auto *program = optimize_block[0]->Program(); + auto *program = optimize_blocks[0]->Program(); framework::Executor executor(dev_place); // prepare for prefetch @@ -340,8 +337,8 @@ class ListenAndServOpMaker : public framework::OpProtoAndCheckerMaker { "a map from grad name to it's optimize block id") .SetDefault({}); AddAttr("sync_mode", "if works at sync_mode or not").SetDefault(true); - AddAttr(kOptimizeBlocks, - "Optimize blocks to run on server side."); + AddAttr>( + kOptimizeBlocks, "Optimize blocks to run on server side."); AddAttr>(kPrefetchVarNameToBlockId, "prefetch blocks to run on server side.") .SetDefault({}); diff --git a/paddle/fluid/pybind/protobuf.cc b/paddle/fluid/pybind/protobuf.cc index bcf6d4dd3..2d44e1f63 100644 --- a/paddle/fluid/pybind/protobuf.cc +++ b/paddle/fluid/pybind/protobuf.cc @@ -293,6 +293,7 @@ void BindOpDesc(pybind11::module *m) { .def("set_attr", &pd::OpDesc::SetAttr) .def("attr", &pd::OpDesc::GetAttr) .def("set_block_attr", &pd::OpDesc::SetBlockAttr) + .def("set_blocks_attr", &pd::OpDesc::SetBlocksAttr) .def("set_serialized_attr", [](pd::OpDesc &self, const std::string &name, const pybind11::bytes &seriralized) { diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index db21b1f3c..184307266 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -561,6 +561,10 @@ class Operator(object): if isinstance(self.attrs[attr_name], Block): self.desc.set_block_attr(attr_name, self.attrs[attr_name].desc) + elif isinstance(self.attrs[attr_name], list) and \ + all(isinstance(v, Block) for v in self.attrs[attr_name]): + self.desc.set_blocks_attr( + attr_name, [v.desc for v in self.attrs[attr_name]]) elif isinstance(self.attrs[attr_name], core.BlockDesc) or \ isinstance(self.attrs[attr_name], core.ProgramDesc): self.desc.set_serialized_attr( @@ -715,6 +719,8 @@ class Operator(object): self.attrs[name] = val if isinstance(val, Block): self.desc.set_block_attr(name, val.desc) + elif isinstance(val, list) and all(isinstance(v, Block) for v in val): + self.desc.set_blocks_attr(name, [v.desc for v in val]) elif isinstance(val, core.BlockDesc) or \ isinstance(val, core.ProgramDesc): self.desc.set_serialized_attr(name, val.serialize_to_string()) diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py index 391dddcf3..676079144 100644 --- a/python/paddle/fluid/transpiler/distribute_transpiler.py +++ b/python/paddle/fluid/transpiler/distribute_transpiler.py @@ -396,7 +396,7 @@ class DistributeTranspiler(object): return varname return "" - def __clone_lr_op_sub_block__(op, program, new_block, skip_sub_blks): + def __clone_lr_op_sub_block__(op, program, new_block): if not op.has_attr('sub_block'): return @@ -406,7 +406,6 @@ class DistributeTranspiler(object): # we put the new sub block to new block to follow the block # hierarchy of the original blocks new_sub_block = program.create_block(new_block.idx) - skip_sub_blks.append(new_sub_block.idx) # clone vars for var in origin_block.vars: @@ -416,8 +415,7 @@ class DistributeTranspiler(object): for op in origin_block.ops: self._clone_lr_op(program, new_sub_block, op) # clone sub_block of op - __clone_lr_op_sub_block__(op, program, new_sub_block, - skip_sub_blks) + __clone_lr_op_sub_block__(op, program, new_sub_block) # reset the block of op op.set_attr('sub_block', new_sub_block) @@ -433,8 +431,7 @@ class DistributeTranspiler(object): for _, op in enumerate(lr_ops): self._append_pserver_non_opt_ops(lr_decay_block, op) # append sub blocks to pserver_program in lr_decay_op - __clone_lr_op_sub_block__(op, pserver_program, lr_decay_block, - skip_sub_blks) + __clone_lr_op_sub_block__(op, pserver_program, lr_decay_block) # append op to the current block grad_to_block_id = [] -- GitLab