diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 2ce73df0248bee244665b033edddcea70407546d..46e56981ea57722bbc064304761e7ab7b7aee141 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -6,7 +6,7 @@ paddle.fluid.Program.create_block ArgSpec(args=['self', 'parent_idx'], varargs=N paddle.fluid.Program.current_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.Program.get_desc ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.Program.global_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) -paddle.fluid.Program.inference_optimize ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) +paddle.fluid.Program.inference_optimize ArgSpec(args=['self', 'export_for_deployment'], varargs=None, keywords=None, defaults=(True,)) paddle.fluid.Program.list_vars ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.Program.optimized_guard ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None) paddle.fluid.Program.parse_from_string ArgSpec(args=['binary_str'], varargs=None, keywords=None, defaults=None) @@ -18,6 +18,9 @@ paddle.fluid.Operator.all_attrs ArgSpec(args=['self'], varargs=None, keywords=No paddle.fluid.Operator.attr ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=None) paddle.fluid.Operator.attr_type ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=None) paddle.fluid.Operator.block_attr ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=None) +paddle.fluid.Operator.block_attr_id ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=None) +paddle.fluid.Operator.blocks_attr ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=None) +paddle.fluid.Operator.blocks_attr_ids ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=None) paddle.fluid.Operator.has_attr ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=None) paddle.fluid.Operator.has_kernel ArgSpec(args=['self', 'op_type'], varargs=None, keywords=None, defaults=None) paddle.fluid.Operator.input ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=None) @@ -74,7 +77,7 @@ paddle.fluid.io.save_persistables ArgSpec(args=['executor', 'dirname', 'main_pro paddle.fluid.io.load_vars ArgSpec(args=['executor', 'dirname', 'main_program', 'vars', 'predicate', 'filename'], varargs=None, keywords=None, defaults=(None, None, None, None)) paddle.fluid.io.load_params ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.io.load_persistables ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)) -paddle.fluid.io.save_inference_model ArgSpec(args=['dirname', 'feeded_var_names', 'target_vars', 'executor', 'main_program', 'model_filename', 'params_filename'], varargs=None, keywords=None, defaults=(None, None, None)) +paddle.fluid.io.save_inference_model ArgSpec(args=['dirname', 'feeded_var_names', 'target_vars', 'executor', 'main_program', 'model_filename', 'params_filename', 'export_for_deployment'], varargs=None, keywords=None, defaults=(None, None, None, True)) paddle.fluid.io.load_inference_model ArgSpec(args=['dirname', 'executor', 'model_filename', 'params_filename'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.io.get_inference_program ArgSpec(args=['target_vars', 'main_program'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.initializer.ConstantInitializer.__init__ ArgSpec(args=['self', 'value', 'force_cpu'], varargs=None, keywords=None, defaults=(0.0, False)) diff --git a/paddle/fluid/framework/op_desc.cc b/paddle/fluid/framework/op_desc.cc index af26cf287205833ebdfe1c3971b760adbf5c0a68..122dc161b41246e5f08bd0ae8b763489e9ee22f9 100644 --- a/paddle/fluid/framework/op_desc.cc +++ b/paddle/fluid/framework/op_desc.cc @@ -297,7 +297,20 @@ Attribute OpDesc::GetNullableAttr(const std::string &name) const { } } -int OpDesc::GetBlockAttr(const std::string &name) const { +std::vector OpDesc::GetBlocksAttrIds(const std::string &name) const { + auto it = attrs_.find(name); + PADDLE_ENFORCE(it != attrs_.end(), "Attribute %s is not found", name); + auto blocks = boost::get>(it->second); + + std::vector ids; + for (auto n : blocks) { + ids.push_back(n->ID()); + } + + return ids; +} + +int OpDesc::GetBlockAttrId(const std::string &name) const { auto it = attrs_.find(name); PADDLE_ENFORCE(it != attrs_.end(), "Attribute %s is not found", name); return boost::get(it->second)->ID(); diff --git a/paddle/fluid/framework/op_desc.h b/paddle/fluid/framework/op_desc.h index 6805d25934b42a5752cbb54174d3017cf63e4b23..2422392e24d864dc3e7973ab35e038ecf2c0392a 100644 --- a/paddle/fluid/framework/op_desc.h +++ b/paddle/fluid/framework/op_desc.h @@ -85,7 +85,9 @@ class OpDesc { Attribute GetNullableAttr(const std::string &name) const; - int GetBlockAttr(const std::string &name) const; + int GetBlockAttrId(const std::string &name) const; + + std::vector GetBlocksAttrIds(const std::string &name) const; void Rename(const std::string &old_name, const std::string &new_name); diff --git a/paddle/fluid/framework/program_desc.cc b/paddle/fluid/framework/program_desc.cc index 1e01a6e900404990e16674755367d2fc6d832725..20bdc7830f32564448a69e9cd76c02585b7a1aca 100644 --- a/paddle/fluid/framework/program_desc.cc +++ b/paddle/fluid/framework/program_desc.cc @@ -58,7 +58,7 @@ ProgramDesc::ProgramDesc(const ProgramDesc &o) { for (const std::string &attr_name : op->AttrNames()) { if (op->GetAttrType(attr_name) == proto::AttrType::BLOCK) { int sub_block_id = - o.Block(block_id).Op(op_id)->GetBlockAttr(attr_name); + o.Block(block_id).Op(op_id)->GetBlockAttrId(attr_name); op->SetBlockAttr(attr_name, MutableBlock(sub_block_id)); } } diff --git a/paddle/fluid/platform/profiler.cc b/paddle/fluid/platform/profiler.cc index d0286719b9ea1aa671294f519051ac1e269c4e93..652a6ec7a4e2e823b28f39b449570cd375e88e18 100644 --- a/paddle/fluid/platform/profiler.cc +++ b/paddle/fluid/platform/profiler.cc @@ -270,12 +270,13 @@ struct EventItem { double min_time; double max_time; double ave_time; + float ratio; }; // Print results void PrintProfiler(const std::vector>& events_table, const std::string& sorted_domain, const size_t name_width, - const size_t data_width) { + const size_t data_width, double total) { // Output header information std::cout << "\n------------------------->" << " Profiling Report " @@ -300,7 +301,8 @@ void PrintProfiler(const std::vector>& events_table, std::cout << std::setw(name_width) << "Event" << std::setw(data_width) << "Calls" << std::setw(data_width) << "Total" << std::setw(data_width) << "Min." << std::setw(data_width) - << "Max." << std::setw(data_width) << "Ave." << std::endl; + << "Max." << std::setw(data_width) << "Ave." + << std::setw(data_width) << "Ratio." << std::endl; for (size_t i = 0; i < events_table.size(); ++i) { for (size_t j = 0; j < events_table[i].size(); ++j) { const EventItem& event_item = events_table[i][j]; @@ -309,7 +311,9 @@ void PrintProfiler(const std::vector>& events_table, << std::setw(data_width) << event_item.total_time << std::setw(data_width) << event_item.min_time << std::setw(data_width) << event_item.max_time - << std::setw(data_width) << event_item.ave_time << std::endl; + << std::setw(data_width) << event_item.ave_time + << std::setw(data_width) << event_item.total_time / total + << std::endl; } } std::cout << std::endl; @@ -359,6 +363,7 @@ void ParseEvents(const std::vector>& events, std::vector> events_table; size_t max_name_width = 0; + double total = 0.; // the total time for (size_t i = 0; i < events.size(); i++) { std::list pushed_events; std::vector event_items; @@ -379,6 +384,7 @@ void ParseEvents(const std::vector>& events, g_state == ProfilerState::kAll) ? rit->CudaElapsedMs(events[i][j]) : rit->CpuElapsedMs(events[i][j]); + total += event_time; std::string event_name = "thread" + std::to_string(rit->thread_id()) + "::" + rit->name(); @@ -387,7 +393,8 @@ void ParseEvents(const std::vector>& events, if (event_idx.find(event_name) == event_idx.end()) { event_idx[event_name] = event_items.size(); EventItem event_item = {event_name, 1, event_time, - event_time, event_time, event_time}; + event_time, event_time, event_time, + 0.}; event_items.push_back(event_item); } else { int index = event_idx[event_name]; @@ -431,7 +438,7 @@ void ParseEvents(const std::vector>& events, } // Print report - PrintProfiler(events_table, sorted_domain, max_name_width + 4, 12); + PrintProfiler(events_table, sorted_domain, max_name_width + 4, 12, total); } void DisableProfiler(EventSortingKey sorted_key, diff --git a/paddle/fluid/pybind/protobuf.cc b/paddle/fluid/pybind/protobuf.cc index 7f06f7a9d72de909da1507d62054da4905084354..c2137ec6d7df24251432a4dfb8fffc3d3f77194e 100644 --- a/paddle/fluid/pybind/protobuf.cc +++ b/paddle/fluid/pybind/protobuf.cc @@ -296,7 +296,8 @@ void BindOpDesc(pybind11::module *m) { std::string ser(seriralized); self.SetAttr(name, ser); }) - .def("block_attr", &pd::OpDesc::GetBlockAttr) + .def("block_attr_id", &pd::OpDesc::GetBlockAttrId) + .def("blocks_attr_ids", &pd::OpDesc::GetBlocksAttrIds) .def("check_attrs", &pd::OpDesc::CheckAttrs) .def("infer_shape", &pd::OpDesc::InferShape) .def("infer_var_type", &pd::OpDesc::InferVarType) diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index aa62d344911cfdee82bf9b868ebed3e38eef80fb..3824b21ec298de6e42d91a128d9abd0cff69d0bc 100644 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -344,7 +344,7 @@ def _append_backward_ops_(block, grad_sub_block_list = [] # If the op has its own sub-block, deal with the sub-block first if op.has_attr("sub_block"): - sub_block = program.block(op.block_attr("sub_block")) + sub_block = program.block(op.block_attr_id("sub_block")) grad_sub_block = program.create_block() grad_sub_block._set_forward_block_idx(sub_block.idx) cb = _callback_lookup_(op) @@ -406,7 +406,7 @@ def _append_backward_vars_(block, start_op_idx, grad_to_var, grad_info_map): for op_idx in range(start_op_idx, block.desc.op_size()): op_desc = block.desc.op(op_idx) if op_desc.has_attr("sub_block"): - sub_block = block.program.block(op_desc.block_attr("sub_block")) + sub_block = block.program.block(op_desc.block_attr_id("sub_block")) _append_backward_vars_(sub_block, 0, grad_to_var, grad_info_map) new_vars = set() # create new gradient variables diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index fbeb0e5940ee1f87935a4e4815c907efdf68a1ae..d413f96e95d1613577f24b4b32a3aa0239b6cc1e 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -477,23 +477,25 @@ class Operator(object): attrs=None): self.block = block self.desc = desc - self.attrs = attrs - if self.attrs is None: - self.attrs = dict() + # note: not add self.attrs here: + # https://github.com/PaddlePaddle/Paddle/pull/12583#pullrequestreview-145093173 + op_attrs = attrs + if op_attrs is None: + op_attrs = dict() del attrs op_maker = core.op_proto_and_checker_maker - if op_maker.kOpRoleAttrName() not in self.attrs: - self.attrs[op_maker.kOpRoleAttrName()] = self.block.program.op_role + if op_maker.kOpRoleAttrName() not in op_attrs: + op_attrs[op_maker.kOpRoleAttrName()] = self.block.program.op_role role_var_name = op_maker.kOpRoleVarAttrName() if len(self.block.program. - op_role_var) != 0 and role_var_name not in self.attrs: - self.attrs[role_var_name] = self.block.program.op_role_var + op_role_var) != 0 and role_var_name not in op_attrs: + op_attrs[role_var_name] = self.block.program.op_role_var - if role_var_name in self.attrs and len(self.attrs[role_var_name]) == 0: - del self.attrs[role_var_name] + if role_var_name in op_attrs and len(op_attrs[role_var_name]) == 0: + del op_attrs[role_var_name] if len(self.desc.type()) != 0: return @@ -563,15 +565,14 @@ class Operator(object): arg.op = self self.desc.set_output(out_proto.name, out_arg_names) - if self.attrs is not None: - if not isinstance(self.attrs, dict): + if op_attrs is not None: + if not isinstance(op_attrs, dict): raise TypeError("'attrs' should be a dict.") for attr in proto.attrs: attr_name = attr.name - if (attr_name not in self.attrs) or ( - self.attrs[attr_name] is None): + if (attr_name not in op_attrs) or (op_attrs[attr_name] is None): continue - attr_val = self.attrs[attr_name] + attr_val = op_attrs[attr_name] self._update_desc_attr(attr_name, attr_val) self.desc.check_attrs() @@ -719,7 +720,6 @@ class Operator(object): Raises: ValueError: If the type of value doesn't match with desc.attr_type(name). """ - self.attrs[name] = val self._update_desc_attr(name, val) def _update_desc_attr(self, name, val): @@ -761,9 +761,9 @@ class Operator(object): """ return self.desc.attr(name) - def block_attr(self, name): + def block_attr_id(self, name): """ - Get the block attribute by name. + Get the block attribute's id by name. Args: name(str): the attribute name. @@ -771,22 +771,74 @@ class Operator(object): Returns: int: the block index. """ - return self.desc.block_attr(name) + return self.desc.block_attr_id(name) + + def block_attr(self, name): + """ + Get the block attribute by name. + + Args: + name(str): the attribute name. + + Returns: + block: the block attribute. + """ + + id = self.block_attr_id(name) + assert (id >= 0 and id < len(self.block.program.blocks)) + return self.block.program.blocks[id] + + def blocks_attr(self, name): + """ + Get the blocks attribute by name. + + Args: + name(str): the attribute name. + + Returns: + list: list of the blocks attribute. + """ + attrs = [] + for i in self.blocks_attr_ids(name): + assert (i >= 0 and i < len(self.block.program.blocks)) + attrs.append(self.block.program.blocks[i]) + + return attrs + + def blocks_attr_ids(self, name): + """ + Get the blocks attribute's ids by name. + + Args: + name(str): the attribute name. + + Returns: + list: list of the blocks ids. + """ + + return self.desc.blocks_attr_ids(name) def all_attrs(self): """ Get the attribute dict. Returns: - dict: The Operator's attribute dict. + dict: The Operator's attribute dict, name->attr. """ attr_names = self.attr_names attr_map = {} for n in attr_names: - if n == 'sub_block': + attr_type = self.desc.attr_type(n) + if attr_type == core.AttrType.BLOCK: attr_map[n] = self.block_attr(n) - else: - attr_map[n] = self.attr(n) + continue + + if attr_type == core.AttrType.BLOCKS: + attr_map[n] = self.blocks_attr(n) + continue + + attr_map[n] = self.attr(n) + return attr_map @@ -1507,13 +1559,19 @@ class Program(object): The two code snippets above will generate same programs. """ if for_test: - p = self.inference_optimize() + p = self.inference_optimize(export_for_deployment=False) else: p = Program() + p.current_block_idx = self.current_block_idx + p._seed = self._seed p.desc = core.ProgramDesc(self.desc) p.blocks = [ Block(p, i) for i in six.moves.range(self.desc.num_blocks()) ] + + p._current_role = self._current_role + p._op_role_var = self._op_role_var + p._sync_with_cpp() p._copy_param_info_from(self) @@ -1571,7 +1629,7 @@ class Program(object): res._sync_with_cpp() return res - def inference_optimize(self): + def inference_optimize(self, export_for_deployment=True): """ This method will create a new program and do following adjustments on it: 1. Remove all reader variables and their creator ops if exist. @@ -1582,6 +1640,10 @@ class Program(object): attribute of operators to :code:`True`. All the :code:`Parameter` information will be lost. + Args: + export_for_deployment(bool): remove the read ops that are added by py_reader + for cpp inference library + Notes: This API is a very low level API. Use :code:`Program.clone(for_test=True)` instead. @@ -1596,16 +1658,17 @@ class Program(object): # remove all readers and the read_op if exist read_op_idx = 0 root_block = res.desc.block(0) - while True: - if read_op_idx >= root_block.op_size() or root_block.op( - read_op_idx).type() == 'read': - break - read_op_idx += 1 - if read_op_idx < root_block.op_size(): - root_block._remove_op(0, read_op_idx + 1) - for var in root_block.all_vars(): - if var.type() == core.VarDesc.VarType.READER: - root_block._remove_var(var.name()) + if export_for_deployment: + while True: + if read_op_idx >= root_block.op_size() or root_block.op( + read_op_idx).type() == 'read': + break + read_op_idx += 1 + if read_op_idx < root_block.op_size(): + root_block._remove_op(0, read_op_idx + 1) + for var in root_block.all_vars(): + if var.type() == core.VarDesc.VarType.READER: + root_block._remove_var(var.name()) # change all `is_test` attributes to True for i in six.moves.range(res.desc.num_blocks()): diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py index 83290ac60839b855a0348696ad6898af7335e2fc..3f740dd7c539973c5bb2b260120729e9563eedd2 100644 --- a/python/paddle/fluid/initializer.py +++ b/python/paddle/fluid/initializer.py @@ -264,7 +264,8 @@ class NormalInitializer(Initializer): "dtype": int(var.dtype), "mean": self._mean, "std": self._std_dev, - "seed": self._seed + "seed": self._seed, + "use_mkldnn": False }) var.op = op return op diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py index 78e5ef30ccc891039e78e029fef9c792ad96661f..e277c85021c971ad7b3deda6990391c600a6ebaa 100644 --- a/python/paddle/fluid/io.py +++ b/python/paddle/fluid/io.py @@ -555,7 +555,8 @@ def save_inference_model(dirname, executor, main_program=None, model_filename=None, - params_filename=None): + params_filename=None, + export_for_deployment=True): """ Prune the given `main_program` to build a new program especially for inference, and then save it and all related parameters to given `dirname` by the `executor`. @@ -577,6 +578,8 @@ def save_inference_model(dirname, params_filename(str|None): The name of file to save all related parameters. If it is setted None, parameters will be saved in separate files . + export_for_deployment(bool): remove the read ops that are added by py_reader + for cpp inference lib. Default True Returns: None @@ -633,7 +636,8 @@ def save_inference_model(dirname, copy_program.desc.flush() pruned_program = copy_program.prune(targets=target_vars) - inference_program = pruned_program.inference_optimize() + inference_program = pruned_program.inference_optimize( + export_for_deployment=export_for_deployment) fetch_var_names = [v.name for v in target_vars] prepend_feed_ops(inference_program, feeded_var_names) diff --git a/python/paddle/fluid/tests/unittests/test_desc_clone.py b/python/paddle/fluid/tests/unittests/test_desc_clone.py new file mode 100644 index 0000000000000000000000000000000000000000..8603d3a5b3b5d368fe87b8dcf9dc7363f95caf86 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_desc_clone.py @@ -0,0 +1,196 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import argparse +import time +import math + +import paddle +import paddle.fluid as fluid +import paddle.fluid.profiler as profiler +from paddle.fluid import core +import unittest +from multiprocessing import Process +import os +import signal +import collections + +SEED = 1 +DTYPE = "float32" +paddle.dataset.mnist.fetch() + + +# random seed must set before configuring the network. +# fluid.default_startup_program().random_seed = SEED +def cnn_model(data): + conv_pool_1 = fluid.nets.simple_img_conv_pool( + input=data, + filter_size=5, + num_filters=20, + pool_size=2, + pool_stride=2, + act="relu") + conv_pool_2 = fluid.nets.simple_img_conv_pool( + input=conv_pool_1, + filter_size=5, + num_filters=50, + pool_size=2, + pool_stride=2, + act="relu") + + # TODO(dzhwinter) : refine the initializer and random seed settting + SIZE = 10 + input_shape = conv_pool_2.shape + param_shape = [reduce(lambda a, b: a * b, input_shape[1:], 1)] + [SIZE] + scale = (2.0 / (param_shape[0]**2 * SIZE))**0.5 + + predict = fluid.layers.fc( + input=conv_pool_2, + size=SIZE, + act="softmax", + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.NormalInitializer( + loc=0.0, scale=scale))) + return predict + + +def get_model(batch_size): + # Input data + images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE) + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + + # Train program + predict = cnn_model(images) + cost = fluid.layers.cross_entropy(input=predict, label=label) + avg_cost = fluid.layers.mean(x=cost) + + # Evaluator + batch_size_tensor = fluid.layers.create_tensor(dtype='int64') + batch_acc = fluid.layers.accuracy( + input=predict, label=label, total=batch_size_tensor) + + inference_program = fluid.default_main_program().clone() + # Optimization + opt = fluid.optimizer.AdamOptimizer( + learning_rate=0.001, beta1=0.9, beta2=0.999) + + # Reader + train_reader = paddle.batch( + paddle.dataset.mnist.train(), batch_size=batch_size) + test_reader = paddle.batch( + paddle.dataset.mnist.test(), batch_size=batch_size) + opt.minimize(avg_cost) + return inference_program, avg_cost, train_reader, test_reader, batch_acc, predict + + +def get_transpiler(trainer_id, main_program, pserver_endpoints, trainers): + t = fluid.DistributeTranspiler() + t.transpile( + trainer_id=trainer_id, + program=main_program, + pservers=pserver_endpoints, + trainers=trainers) + return t + + +def operator_equal(a, b): + for k, v in a.__dict__.iteritems(): + if isinstance(v, fluid.framework.Program) or \ + isinstance(v, fluid.framework.Block): + continue + + elif isinstance(v, core.OpDesc): + if v.serialize_to_string() != b.__dict__[k].serialize_to_string(): + raise ValueError("In operator_equal not equal:{0}\n".format(k)) + + elif isinstance(v, collections.OrderedDict): + v0 = sorted(v.iteritems(), key=lambda x: x[0]) + v1 = sorted(b.__dict__[k].iteritems(), key=lambda x: x[0]) + + if v0 != v1: + raise ValueError("In operator_equal not equal:{0}\n".format(k)) + + elif (v != b.__dict__[k]): + raise ValueError("In operator_equal not equal:{0}\n".format(k)) + + return True + + +def block_equal(a, b): + for k, v in a.__dict__.iteritems(): + if isinstance(v, core.ProgramDesc) or isinstance( + v, fluid.framework.Program) or isinstance(v, core.BlockDesc): + continue + + elif k == "ops": + for i in range(0, len(a.ops)): + if not operator_equal(a.ops[i], b.ops[i]): + raise ValueError("In block_equal not equal:{0}\n".format(k)) + assert (len(a.ops) == len(b.ops)) + + elif isinstance(v, collections.OrderedDict): + v0 = sorted(v.iteritems(), key=lambda x: x[0]) + v1 = sorted(b.__dict__[k].iteritems(), key=lambda x: x[0]) + + if v0 != v1: + raise ValueError("In block_equal not equal:{0}\n".format(k)) + + elif (v != b.__dict__[k]): + raise ValueError("In block_equal not equal:{0}\n".format(k)) + + return True + + +def program_equal(a, b): + for k, v in a.__dict__.iteritems(): + if isinstance(v, core.ProgramDesc): + continue + + elif k == 'blocks': + for i in range(0, len(a.blocks)): + if not block_equal(a.blocks[i], b.blocks[i]): + raise ValueError("In operator_equal not equal:{0}\n".format( + k)) + return False + assert (len(a.blocks) == len(b.blocks)) + + elif (v != b.__dict__[k]): + raise ValueError("In program_equal not equal:{0}\n".format(k)) + + return True + + +class TestDistMnist(unittest.TestCase): + def test_desc_clone(self): + get_model(batch_size=20) + + pserver_endpoints = "127.0.0.1:9123" + trainers = 1 + current_endpoint = "127.0.0.1:9123" + t = get_transpiler(0, + fluid.default_main_program(), pserver_endpoints, + trainers) + + pserver_prog = t.get_pserver_program(current_endpoint) + startup_prog = t.get_startup_program(current_endpoint, pserver_prog) + main = pserver_prog.clone() + startup = startup_prog.clone() + + self.assertTrue(program_equal(main, pserver_prog)) + self.assertTrue(program_equal(startup, startup_prog)) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_dist_base.py b/python/paddle/fluid/tests/unittests/test_dist_base.py index e059f2cd2ab994d580b3a494f6f5556938be1fa1..ab028dd36faf1fe0fc6fdaf3160bfd5b44d6ccdd 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_base.py +++ b/python/paddle/fluid/tests/unittests/test_dist_base.py @@ -134,7 +134,7 @@ class TestDistBase(unittest.TestCase): self._ps_endpoints = "127.0.0.1:9123,127.0.0.1:9124" self._python_interp = "python" - def start_pserver(self, model_file): + def start_pserver(self, model_file, check_error_log): ps0_ep, ps1_ep = self._ps_endpoints.split(",") ps0_cmd = "%s %s pserver %s 0 %s %d TRUE" % \ (self._python_interp, model_file, self._ps_endpoints, ps0_ep, @@ -143,11 +143,23 @@ class TestDistBase(unittest.TestCase): (self._python_interp, model_file, self._ps_endpoints, ps1_ep, self._trainers) + ps0_pipe = subprocess.PIPE + ps1_pipe = subprocess.PIPE + if check_error_log: + print("ps0_cmd:", ps0_cmd) + print("ps1_cmd:", ps1_cmd) + ps0_pipe = open("/tmp/ps0_err.log", "wb") + ps1_pipe = open("/tmp/ps1_err.log", "wb") + ps0_proc = subprocess.Popen( - ps0_cmd.split(" "), stdout=subprocess.PIPE, stderr=subprocess.PIPE) + ps0_cmd.split(" "), stdout=subprocess.PIPE, stderr=ps0_pipe) ps1_proc = subprocess.Popen( - ps1_cmd.split(" "), stdout=subprocess.PIPE, stderr=subprocess.PIPE) - return ps0_proc, ps1_proc + ps1_cmd.split(" "), stdout=subprocess.PIPE, stderr=ps1_pipe) + + if not check_error_log: + return ps0_proc, ps1_proc, None, None + else: + return ps0_proc, ps1_proc, ps0_pipe, ps1_pipe def _wait_ps_ready(self, pid): retry_times = 50 @@ -164,7 +176,7 @@ class TestDistBase(unittest.TestCase): (e, retry_times)) retry_times -= 1 - def check_with_place(self, model_file, delta=1e-3): + def check_with_place(self, model_file, delta=1e-3, check_error_log=False): # *ATTENTION* THIS TEST NEEDS AT LEAST 2GPUS TO RUN required_envs = { "PATH": os.getenv("PATH"), @@ -173,17 +185,32 @@ class TestDistBase(unittest.TestCase): "FLAGS_fraction_of_gpu_memory_to_use": "0.15", "FLAGS_cudnn_deterministic": "1" } + + if check_error_log: + required_envs["GLOG_v"] = "7" + required_envs["GLOG_logtostderr"] = "1" + # Run local to get a base line env_local = {"CUDA_VISIBLE_DEVICES": "0"} env_local.update(required_envs) local_cmd = "%s %s trainer %s 0 %s %d FLASE" % \ (self._python_interp, model_file, "127.0.0.1:1234", "127.0.0.1:1234", 1) - local_proc = subprocess.Popen( - local_cmd.split(" "), - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - env=env_local) + if not check_error_log: + local_proc = subprocess.Popen( + local_cmd.split(" "), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=env_local) + else: + print("trainer cmd:", local_cmd) + err_log = open("/tmp/trainer.err.log", "wb") + local_proc = subprocess.Popen( + local_cmd.split(" "), + stdout=subprocess.PIPE, + stderr=err_log, + env=env_local) + local_proc.wait() out, err = local_proc.communicate() local_ret = cpt.to_text(out) @@ -191,7 +218,8 @@ class TestDistBase(unittest.TestCase): sys.stderr.write('local_stderr: %s\n' % err) # Run dist train to compare with local results - ps0, ps1 = self.start_pserver(model_file) + ps0, ps1, ps0_pipe, ps1_pipe = self.start_pserver(model_file, + check_error_log) self._wait_ps_ready(ps0.pid) self._wait_ps_ready(ps1.pid) @@ -209,15 +237,23 @@ class TestDistBase(unittest.TestCase): env1.update(required_envs) FNULL = open(os.devnull, 'w') + tr0_pipe = subprocess.PIPE + tr1_pipe = subprocess.PIPE + if check_error_log: + print("tr0_cmd:", tr0_cmd) + print("tr1_cmd:", tr1_cmd) + tr0_pipe = open("/tmp/tr0_err.log", "wb") + tr1_pipe = open("/tmp/tr1_err.log", "wb") + tr0_proc = subprocess.Popen( tr0_cmd.split(" "), stdout=subprocess.PIPE, - stderr=subprocess.PIPE, + stderr=tr0_pipe, env=env0) tr1_proc = subprocess.Popen( tr1_cmd.split(" "), stdout=subprocess.PIPE, - stderr=subprocess.PIPE, + stderr=tr1_pipe, env=env1) tr0_proc.wait() @@ -234,6 +270,13 @@ class TestDistBase(unittest.TestCase): local_first_loss = eval(local_lines[0])[0] local_last_loss = eval(local_lines[1])[0] + # close trainer file + if check_error_log: + tr0_pipe.close() + tr1_pipe.close() + + ps0_pipe.close() + ps1_pipe.close() # FIXME: use terminate() instead of sigkill. os.kill(ps0.pid, signal.SIGKILL) os.kill(ps1.pid, signal.SIGKILL) diff --git a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py index ac1719020bf6b6883619d613c8f34ae43c4cb224..3a91b34e6946118b963595a6222d23656e7af782 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py +++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py @@ -260,7 +260,7 @@ class TestLRDecayConditional(TranspilerTest): serv_op = pserver.blocks[0].ops[0] sub_blocks = [] optimize_blocks = [] - for b in serv_op.attrs["optimize_blocks"]: + for b in serv_op.all_attrs()["optimize_blocks"]: optimize_blocks.append(b.idx) for b in pserver.blocks: if b.idx not in optimize_blocks: diff --git a/python/paddle/fluid/tests/unittests/test_program.py b/python/paddle/fluid/tests/unittests/test_program.py index c51a48239330621d8e008415f81361616467cabf..0997afc97a97333c914a3027103ec48733b410dc 100644 --- a/python/paddle/fluid/tests/unittests/test_program.py +++ b/python/paddle/fluid/tests/unittests/test_program.py @@ -17,6 +17,7 @@ import unittest from paddle.fluid.framework import Program, default_main_program, program_guard, grad_var_name import paddle.fluid.layers as layers +import paddle.fluid as fluid main_program = default_main_program() @@ -98,6 +99,39 @@ class TestProgram(unittest.TestCase): new_program = main_program.clone() self.assertNotEqual(0, len(new_program.blocks[0].all_parameters())) + def test_program_inference_optimize(self): + def net(): + reader = fluid.layers.py_reader( + capacity=10, + shapes=[[-1, 10], [-1, 1]], + lod_levels=[0, 0], + dtypes=['float32', 'int64'], + use_double_buffer=True) + in_data, label = fluid.layers.read_file(reader) + predict_label = fluid.layers.fc(in_data, size=2, act='softmax') + loss = fluid.layers.mean( + fluid.layers.cross_entropy( + input=predict_label, label=label)) + + optimizer = fluid.optimizer.Adam() + optimizer.minimize(loss) + + startup_program = fluid.Program() + main_program = fluid.Program() + with fluid.program_guard(main_program, startup_program): + net() + no_read_program = main_program.inference_optimize() + keep_read_program = main_program.inference_optimize( + export_for_deployment=False) + no_read_ops = no_read_program.global_block().ops + keep_read_ops = keep_read_program.global_block().ops + self.assertEqual(len(keep_read_ops) - len(no_read_ops), 2) + self.assertEqual(keep_read_ops[0].type, 'create_double_buffer_reader') + self.assertEqual(keep_read_ops[1].type, 'read') + + for i in range(len(no_read_ops)): + self.assertEqual(no_read_ops[i].type, keep_read_ops[i + 2].type) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_protobuf_descs.py b/python/paddle/fluid/tests/unittests/test_protobuf_descs.py index 37de79211413b757dccdd3ed68dfc3c38a02becb..f7087299cfb94a3d9413fa10ae8b192e2bf172d3 100644 --- a/python/paddle/fluid/tests/unittests/test_protobuf_descs.py +++ b/python/paddle/fluid/tests/unittests/test_protobuf_descs.py @@ -69,7 +69,7 @@ class TestOpDesc(unittest.TestCase): self.assertEqual(8, len(op.attr_names())) op.set_block_attr("block_attr", program_desc.block(0)) - self.assertEqual(0, op.block_attr("block_attr")) + self.assertEqual(0, op.block_attr_id("block_attr")) mul_op = block.append_op() mul_op.set_type("mul") diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py index 466e603bdcc0fb710f91e8de1f7325ab3bec9c61..6d94d5745a44fe320cddaab30bc6ed8073c4126e 100644 --- a/python/paddle/fluid/transpiler/distribute_transpiler.py +++ b/python/paddle/fluid/transpiler/distribute_transpiler.py @@ -586,12 +586,12 @@ class DistributeTranspiler(object): if op.type in [ "gaussian_random", "fill_constant", "uniform_random" ]: - op.attrs["shape"] = new_outputs["Out"].shape + op.set_attr("shape", list(new_outputs["Out"].shape)) s_prog.global_block().append_op( type=op.type, inputs=new_inputs, outputs=new_outputs, - attrs=op.attrs) + attrs=op.all_attrs()) return s_prog # ====================== private transpiler functions ===================== @@ -605,7 +605,7 @@ class DistributeTranspiler(object): self.table_name = None for op in self.origin_program.global_block().ops: if op.type == LOOKUP_TABLE_TYPE: - if op.attrs['is_distributed'] is True: + if op.attr('is_distributed') is True: if self.table_name is None: self.table_name = op.input("W")[0] if self.table_name != op.input("W")[0]: @@ -1265,7 +1265,7 @@ class DistributeTranspiler(object): type=opt_op.type, inputs=new_inputs, outputs=outputs, - attrs=opt_op.attrs) + attrs=opt_op.all_attrs()) def _is_splited_grad_var(self, var, var_dict): grad_block = None @@ -1296,7 +1296,7 @@ class DistributeTranspiler(object): block._clone_variable(var) return block.append_op( - type=op.type, inputs=inputs, outputs=outputs, attrs=op.attrs) + type=op.type, inputs=inputs, outputs=outputs, attrs=op.all_attrs()) def _append_pserver_non_opt_ops(self, optimize_block, opt_op): program = optimize_block.program @@ -1337,7 +1337,7 @@ class DistributeTranspiler(object): type=opt_op.type, inputs=inputs, outputs=outputs, - attrs=opt_op.attrs) + attrs=opt_op.all_attrs()) def _is_op_connected(self, op1, op2): # If one op's input is another op's output or @@ -1442,8 +1442,8 @@ class DistributeTranspiler(object): # optimize op_maker = core.op_proto_and_checker_maker optimize_role = core.op_proto_and_checker_maker.OpRole.Optimize - if op_maker.kOpRoleAttrName() in op.attrs and \ - int(op.attrs[op_maker.kOpRoleAttrName()]) == int(optimize_role): + if op_maker.kOpRoleAttrName() in op.attr_names and \ + int(op.all_attrs()[op_maker.kOpRoleAttrName()]) == int(optimize_role): return True return False @@ -1466,8 +1466,8 @@ class DistributeTranspiler(object): # and op_role_var to get the pair. for input_name in op.input_arg_names: if input_name.find("@GRAD") != -1 and \ - op.attrs[RPC_OP_ROLE_ATTR_NAME]: - param_name = op.attrs[OP_ROLE_VAR_ATTR_NAME][0] + op.attr(RPC_OP_ROLE_ATTR_NAME): + param_name = op.attr(OP_ROLE_VAR_ATTR_NAME)[0] params_grads.append([ origin_var_dict[param_name], origin_var_dict[input_name]