提交 77f12e00 编写于 作者: M minqiyang

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into port_pybind11

......@@ -6,7 +6,7 @@ paddle.fluid.Program.create_block ArgSpec(args=['self', 'parent_idx'], varargs=N
paddle.fluid.Program.current_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Program.get_desc ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Program.global_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Program.inference_optimize ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Program.inference_optimize ArgSpec(args=['self', 'export_for_deployment'], varargs=None, keywords=None, defaults=(True,))
paddle.fluid.Program.list_vars ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Program.optimized_guard ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
paddle.fluid.Program.parse_from_string ArgSpec(args=['binary_str'], varargs=None, keywords=None, defaults=None)
......@@ -18,6 +18,9 @@ paddle.fluid.Operator.all_attrs ArgSpec(args=['self'], varargs=None, keywords=No
paddle.fluid.Operator.attr ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Operator.attr_type ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Operator.block_attr ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Operator.block_attr_id ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Operator.blocks_attr ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Operator.blocks_attr_ids ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Operator.has_attr ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Operator.has_kernel ArgSpec(args=['self', 'op_type'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Operator.input ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=None)
......@@ -74,7 +77,7 @@ paddle.fluid.io.save_persistables ArgSpec(args=['executor', 'dirname', 'main_pro
paddle.fluid.io.load_vars ArgSpec(args=['executor', 'dirname', 'main_program', 'vars', 'predicate', 'filename'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.io.load_params ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.io.load_persistables ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.io.save_inference_model ArgSpec(args=['dirname', 'feeded_var_names', 'target_vars', 'executor', 'main_program', 'model_filename', 'params_filename'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.io.save_inference_model ArgSpec(args=['dirname', 'feeded_var_names', 'target_vars', 'executor', 'main_program', 'model_filename', 'params_filename', 'export_for_deployment'], varargs=None, keywords=None, defaults=(None, None, None, True))
paddle.fluid.io.load_inference_model ArgSpec(args=['dirname', 'executor', 'model_filename', 'params_filename'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.io.get_inference_program ArgSpec(args=['target_vars', 'main_program'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.initializer.ConstantInitializer.__init__ ArgSpec(args=['self', 'value', 'force_cpu'], varargs=None, keywords=None, defaults=(0.0, False))
......
......@@ -297,7 +297,20 @@ Attribute OpDesc::GetNullableAttr(const std::string &name) const {
}
}
int OpDesc::GetBlockAttr(const std::string &name) const {
std::vector<int> OpDesc::GetBlocksAttrIds(const std::string &name) const {
auto it = attrs_.find(name);
PADDLE_ENFORCE(it != attrs_.end(), "Attribute %s is not found", name);
auto blocks = boost::get<std::vector<BlockDesc *>>(it->second);
std::vector<int> ids;
for (auto n : blocks) {
ids.push_back(n->ID());
}
return ids;
}
int OpDesc::GetBlockAttrId(const std::string &name) const {
auto it = attrs_.find(name);
PADDLE_ENFORCE(it != attrs_.end(), "Attribute %s is not found", name);
return boost::get<BlockDesc *>(it->second)->ID();
......
......@@ -85,7 +85,9 @@ class OpDesc {
Attribute GetNullableAttr(const std::string &name) const;
int GetBlockAttr(const std::string &name) const;
int GetBlockAttrId(const std::string &name) const;
std::vector<int> GetBlocksAttrIds(const std::string &name) const;
void Rename(const std::string &old_name, const std::string &new_name);
......
......@@ -58,7 +58,7 @@ ProgramDesc::ProgramDesc(const ProgramDesc &o) {
for (const std::string &attr_name : op->AttrNames()) {
if (op->GetAttrType(attr_name) == proto::AttrType::BLOCK) {
int sub_block_id =
o.Block(block_id).Op(op_id)->GetBlockAttr(attr_name);
o.Block(block_id).Op(op_id)->GetBlockAttrId(attr_name);
op->SetBlockAttr(attr_name, MutableBlock(sub_block_id));
}
}
......
......@@ -270,12 +270,13 @@ struct EventItem {
double min_time;
double max_time;
double ave_time;
float ratio;
};
// Print results
void PrintProfiler(const std::vector<std::vector<EventItem>>& events_table,
const std::string& sorted_domain, const size_t name_width,
const size_t data_width) {
const size_t data_width, double total) {
// Output header information
std::cout << "\n------------------------->"
<< " Profiling Report "
......@@ -300,7 +301,8 @@ void PrintProfiler(const std::vector<std::vector<EventItem>>& events_table,
std::cout << std::setw(name_width) << "Event" << std::setw(data_width)
<< "Calls" << std::setw(data_width) << "Total"
<< std::setw(data_width) << "Min." << std::setw(data_width)
<< "Max." << std::setw(data_width) << "Ave." << std::endl;
<< "Max." << std::setw(data_width) << "Ave."
<< std::setw(data_width) << "Ratio." << std::endl;
for (size_t i = 0; i < events_table.size(); ++i) {
for (size_t j = 0; j < events_table[i].size(); ++j) {
const EventItem& event_item = events_table[i][j];
......@@ -309,7 +311,9 @@ void PrintProfiler(const std::vector<std::vector<EventItem>>& events_table,
<< std::setw(data_width) << event_item.total_time
<< std::setw(data_width) << event_item.min_time
<< std::setw(data_width) << event_item.max_time
<< std::setw(data_width) << event_item.ave_time << std::endl;
<< std::setw(data_width) << event_item.ave_time
<< std::setw(data_width) << event_item.total_time / total
<< std::endl;
}
}
std::cout << std::endl;
......@@ -359,6 +363,7 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
std::vector<std::vector<EventItem>> events_table;
size_t max_name_width = 0;
double total = 0.; // the total time
for (size_t i = 0; i < events.size(); i++) {
std::list<Event> pushed_events;
std::vector<EventItem> event_items;
......@@ -379,6 +384,7 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
g_state == ProfilerState::kAll)
? rit->CudaElapsedMs(events[i][j])
: rit->CpuElapsedMs(events[i][j]);
total += event_time;
std::string event_name =
"thread" + std::to_string(rit->thread_id()) + "::" + rit->name();
......@@ -387,7 +393,8 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
if (event_idx.find(event_name) == event_idx.end()) {
event_idx[event_name] = event_items.size();
EventItem event_item = {event_name, 1, event_time,
event_time, event_time, event_time};
event_time, event_time, event_time,
0.};
event_items.push_back(event_item);
} else {
int index = event_idx[event_name];
......@@ -431,7 +438,7 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
}
// Print report
PrintProfiler(events_table, sorted_domain, max_name_width + 4, 12);
PrintProfiler(events_table, sorted_domain, max_name_width + 4, 12, total);
}
void DisableProfiler(EventSortingKey sorted_key,
......
......@@ -296,7 +296,8 @@ void BindOpDesc(pybind11::module *m) {
std::string ser(seriralized);
self.SetAttr(name, ser);
})
.def("block_attr", &pd::OpDesc::GetBlockAttr)
.def("block_attr_id", &pd::OpDesc::GetBlockAttrId)
.def("blocks_attr_ids", &pd::OpDesc::GetBlocksAttrIds)
.def("check_attrs", &pd::OpDesc::CheckAttrs)
.def("infer_shape", &pd::OpDesc::InferShape)
.def("infer_var_type", &pd::OpDesc::InferVarType)
......
......@@ -344,7 +344,7 @@ def _append_backward_ops_(block,
grad_sub_block_list = []
# If the op has its own sub-block, deal with the sub-block first
if op.has_attr("sub_block"):
sub_block = program.block(op.block_attr("sub_block"))
sub_block = program.block(op.block_attr_id("sub_block"))
grad_sub_block = program.create_block()
grad_sub_block._set_forward_block_idx(sub_block.idx)
cb = _callback_lookup_(op)
......@@ -406,7 +406,7 @@ def _append_backward_vars_(block, start_op_idx, grad_to_var, grad_info_map):
for op_idx in range(start_op_idx, block.desc.op_size()):
op_desc = block.desc.op(op_idx)
if op_desc.has_attr("sub_block"):
sub_block = block.program.block(op_desc.block_attr("sub_block"))
sub_block = block.program.block(op_desc.block_attr_id("sub_block"))
_append_backward_vars_(sub_block, 0, grad_to_var, grad_info_map)
new_vars = set()
# create new gradient variables
......
......@@ -477,23 +477,25 @@ class Operator(object):
attrs=None):
self.block = block
self.desc = desc
self.attrs = attrs
if self.attrs is None:
self.attrs = dict()
# note: not add self.attrs here:
# https://github.com/PaddlePaddle/Paddle/pull/12583#pullrequestreview-145093173
op_attrs = attrs
if op_attrs is None:
op_attrs = dict()
del attrs
op_maker = core.op_proto_and_checker_maker
if op_maker.kOpRoleAttrName() not in self.attrs:
self.attrs[op_maker.kOpRoleAttrName()] = self.block.program.op_role
if op_maker.kOpRoleAttrName() not in op_attrs:
op_attrs[op_maker.kOpRoleAttrName()] = self.block.program.op_role
role_var_name = op_maker.kOpRoleVarAttrName()
if len(self.block.program.
op_role_var) != 0 and role_var_name not in self.attrs:
self.attrs[role_var_name] = self.block.program.op_role_var
op_role_var) != 0 and role_var_name not in op_attrs:
op_attrs[role_var_name] = self.block.program.op_role_var
if role_var_name in self.attrs and len(self.attrs[role_var_name]) == 0:
del self.attrs[role_var_name]
if role_var_name in op_attrs and len(op_attrs[role_var_name]) == 0:
del op_attrs[role_var_name]
if len(self.desc.type()) != 0:
return
......@@ -563,15 +565,14 @@ class Operator(object):
arg.op = self
self.desc.set_output(out_proto.name, out_arg_names)
if self.attrs is not None:
if not isinstance(self.attrs, dict):
if op_attrs is not None:
if not isinstance(op_attrs, dict):
raise TypeError("'attrs' should be a dict.")
for attr in proto.attrs:
attr_name = attr.name
if (attr_name not in self.attrs) or (
self.attrs[attr_name] is None):
if (attr_name not in op_attrs) or (op_attrs[attr_name] is None):
continue
attr_val = self.attrs[attr_name]
attr_val = op_attrs[attr_name]
self._update_desc_attr(attr_name, attr_val)
self.desc.check_attrs()
......@@ -719,7 +720,6 @@ class Operator(object):
Raises:
ValueError: If the type of value doesn't match with desc.attr_type(name).
"""
self.attrs[name] = val
self._update_desc_attr(name, val)
def _update_desc_attr(self, name, val):
......@@ -761,9 +761,9 @@ class Operator(object):
"""
return self.desc.attr(name)
def block_attr(self, name):
def block_attr_id(self, name):
"""
Get the block attribute by name.
Get the block attribute's id by name.
Args:
name(str): the attribute name.
......@@ -771,22 +771,74 @@ class Operator(object):
Returns:
int: the block index.
"""
return self.desc.block_attr(name)
return self.desc.block_attr_id(name)
def block_attr(self, name):
"""
Get the block attribute by name.
Args:
name(str): the attribute name.
Returns:
block: the block attribute.
"""
id = self.block_attr_id(name)
assert (id >= 0 and id < len(self.block.program.blocks))
return self.block.program.blocks[id]
def blocks_attr(self, name):
"""
Get the blocks attribute by name.
Args:
name(str): the attribute name.
Returns:
list: list of the blocks attribute.
"""
attrs = []
for i in self.blocks_attr_ids(name):
assert (i >= 0 and i < len(self.block.program.blocks))
attrs.append(self.block.program.blocks[i])
return attrs
def blocks_attr_ids(self, name):
"""
Get the blocks attribute's ids by name.
Args:
name(str): the attribute name.
Returns:
list: list of the blocks ids.
"""
return self.desc.blocks_attr_ids(name)
def all_attrs(self):
"""
Get the attribute dict.
Returns:
dict: The Operator's attribute dict.
dict: The Operator's attribute dict, name->attr.
"""
attr_names = self.attr_names
attr_map = {}
for n in attr_names:
if n == 'sub_block':
attr_type = self.desc.attr_type(n)
if attr_type == core.AttrType.BLOCK:
attr_map[n] = self.block_attr(n)
else:
attr_map[n] = self.attr(n)
continue
if attr_type == core.AttrType.BLOCKS:
attr_map[n] = self.blocks_attr(n)
continue
attr_map[n] = self.attr(n)
return attr_map
......@@ -1507,13 +1559,19 @@ class Program(object):
The two code snippets above will generate same programs.
"""
if for_test:
p = self.inference_optimize()
p = self.inference_optimize(export_for_deployment=False)
else:
p = Program()
p.current_block_idx = self.current_block_idx
p._seed = self._seed
p.desc = core.ProgramDesc(self.desc)
p.blocks = [
Block(p, i) for i in six.moves.range(self.desc.num_blocks())
]
p._current_role = self._current_role
p._op_role_var = self._op_role_var
p._sync_with_cpp()
p._copy_param_info_from(self)
......@@ -1571,7 +1629,7 @@ class Program(object):
res._sync_with_cpp()
return res
def inference_optimize(self):
def inference_optimize(self, export_for_deployment=True):
"""
This method will create a new program and do following adjustments on it:
1. Remove all reader variables and their creator ops if exist.
......@@ -1582,6 +1640,10 @@ class Program(object):
attribute of operators to :code:`True`. All the :code:`Parameter`
information will be lost.
Args:
export_for_deployment(bool): remove the read ops that are added by py_reader
for cpp inference library
Notes: This API is a very low level API. Use
:code:`Program.clone(for_test=True)` instead.
......@@ -1596,16 +1658,17 @@ class Program(object):
# remove all readers and the read_op if exist
read_op_idx = 0
root_block = res.desc.block(0)
while True:
if read_op_idx >= root_block.op_size() or root_block.op(
read_op_idx).type() == 'read':
break
read_op_idx += 1
if read_op_idx < root_block.op_size():
root_block._remove_op(0, read_op_idx + 1)
for var in root_block.all_vars():
if var.type() == core.VarDesc.VarType.READER:
root_block._remove_var(var.name())
if export_for_deployment:
while True:
if read_op_idx >= root_block.op_size() or root_block.op(
read_op_idx).type() == 'read':
break
read_op_idx += 1
if read_op_idx < root_block.op_size():
root_block._remove_op(0, read_op_idx + 1)
for var in root_block.all_vars():
if var.type() == core.VarDesc.VarType.READER:
root_block._remove_var(var.name())
# change all `is_test` attributes to True
for i in six.moves.range(res.desc.num_blocks()):
......
......@@ -264,7 +264,8 @@ class NormalInitializer(Initializer):
"dtype": int(var.dtype),
"mean": self._mean,
"std": self._std_dev,
"seed": self._seed
"seed": self._seed,
"use_mkldnn": False
})
var.op = op
return op
......
......@@ -555,7 +555,8 @@ def save_inference_model(dirname,
executor,
main_program=None,
model_filename=None,
params_filename=None):
params_filename=None,
export_for_deployment=True):
"""
Prune the given `main_program` to build a new program especially for inference,
and then save it and all related parameters to given `dirname` by the `executor`.
......@@ -577,6 +578,8 @@ def save_inference_model(dirname,
params_filename(str|None): The name of file to save all related parameters.
If it is setted None, parameters will be saved
in separate files .
export_for_deployment(bool): remove the read ops that are added by py_reader
for cpp inference lib. Default True
Returns:
None
......@@ -633,7 +636,8 @@ def save_inference_model(dirname,
copy_program.desc.flush()
pruned_program = copy_program.prune(targets=target_vars)
inference_program = pruned_program.inference_optimize()
inference_program = pruned_program.inference_optimize(
export_for_deployment=export_for_deployment)
fetch_var_names = [v.name for v in target_vars]
prepend_feed_ops(inference_program, feeded_var_names)
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import argparse
import time
import math
import paddle
import paddle.fluid as fluid
import paddle.fluid.profiler as profiler
from paddle.fluid import core
import unittest
from multiprocessing import Process
import os
import signal
import collections
SEED = 1
DTYPE = "float32"
paddle.dataset.mnist.fetch()
# random seed must set before configuring the network.
# fluid.default_startup_program().random_seed = SEED
def cnn_model(data):
conv_pool_1 = fluid.nets.simple_img_conv_pool(
input=data,
filter_size=5,
num_filters=20,
pool_size=2,
pool_stride=2,
act="relu")
conv_pool_2 = fluid.nets.simple_img_conv_pool(
input=conv_pool_1,
filter_size=5,
num_filters=50,
pool_size=2,
pool_stride=2,
act="relu")
# TODO(dzhwinter) : refine the initializer and random seed settting
SIZE = 10
input_shape = conv_pool_2.shape
param_shape = [reduce(lambda a, b: a * b, input_shape[1:], 1)] + [SIZE]
scale = (2.0 / (param_shape[0]**2 * SIZE))**0.5
predict = fluid.layers.fc(
input=conv_pool_2,
size=SIZE,
act="softmax",
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.NormalInitializer(
loc=0.0, scale=scale)))
return predict
def get_model(batch_size):
# Input data
images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE)
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
# Train program
predict = cnn_model(images)
cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(x=cost)
# Evaluator
batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
batch_acc = fluid.layers.accuracy(
input=predict, label=label, total=batch_size_tensor)
inference_program = fluid.default_main_program().clone()
# Optimization
opt = fluid.optimizer.AdamOptimizer(
learning_rate=0.001, beta1=0.9, beta2=0.999)
# Reader
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=batch_size)
test_reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=batch_size)
opt.minimize(avg_cost)
return inference_program, avg_cost, train_reader, test_reader, batch_acc, predict
def get_transpiler(trainer_id, main_program, pserver_endpoints, trainers):
t = fluid.DistributeTranspiler()
t.transpile(
trainer_id=trainer_id,
program=main_program,
pservers=pserver_endpoints,
trainers=trainers)
return t
def operator_equal(a, b):
for k, v in a.__dict__.iteritems():
if isinstance(v, fluid.framework.Program) or \
isinstance(v, fluid.framework.Block):
continue
elif isinstance(v, core.OpDesc):
if v.serialize_to_string() != b.__dict__[k].serialize_to_string():
raise ValueError("In operator_equal not equal:{0}\n".format(k))
elif isinstance(v, collections.OrderedDict):
v0 = sorted(v.iteritems(), key=lambda x: x[0])
v1 = sorted(b.__dict__[k].iteritems(), key=lambda x: x[0])
if v0 != v1:
raise ValueError("In operator_equal not equal:{0}\n".format(k))
elif (v != b.__dict__[k]):
raise ValueError("In operator_equal not equal:{0}\n".format(k))
return True
def block_equal(a, b):
for k, v in a.__dict__.iteritems():
if isinstance(v, core.ProgramDesc) or isinstance(
v, fluid.framework.Program) or isinstance(v, core.BlockDesc):
continue
elif k == "ops":
for i in range(0, len(a.ops)):
if not operator_equal(a.ops[i], b.ops[i]):
raise ValueError("In block_equal not equal:{0}\n".format(k))
assert (len(a.ops) == len(b.ops))
elif isinstance(v, collections.OrderedDict):
v0 = sorted(v.iteritems(), key=lambda x: x[0])
v1 = sorted(b.__dict__[k].iteritems(), key=lambda x: x[0])
if v0 != v1:
raise ValueError("In block_equal not equal:{0}\n".format(k))
elif (v != b.__dict__[k]):
raise ValueError("In block_equal not equal:{0}\n".format(k))
return True
def program_equal(a, b):
for k, v in a.__dict__.iteritems():
if isinstance(v, core.ProgramDesc):
continue
elif k == 'blocks':
for i in range(0, len(a.blocks)):
if not block_equal(a.blocks[i], b.blocks[i]):
raise ValueError("In operator_equal not equal:{0}\n".format(
k))
return False
assert (len(a.blocks) == len(b.blocks))
elif (v != b.__dict__[k]):
raise ValueError("In program_equal not equal:{0}\n".format(k))
return True
class TestDistMnist(unittest.TestCase):
def test_desc_clone(self):
get_model(batch_size=20)
pserver_endpoints = "127.0.0.1:9123"
trainers = 1
current_endpoint = "127.0.0.1:9123"
t = get_transpiler(0,
fluid.default_main_program(), pserver_endpoints,
trainers)
pserver_prog = t.get_pserver_program(current_endpoint)
startup_prog = t.get_startup_program(current_endpoint, pserver_prog)
main = pserver_prog.clone()
startup = startup_prog.clone()
self.assertTrue(program_equal(main, pserver_prog))
self.assertTrue(program_equal(startup, startup_prog))
if __name__ == "__main__":
unittest.main()
......@@ -134,7 +134,7 @@ class TestDistBase(unittest.TestCase):
self._ps_endpoints = "127.0.0.1:9123,127.0.0.1:9124"
self._python_interp = "python"
def start_pserver(self, model_file):
def start_pserver(self, model_file, check_error_log):
ps0_ep, ps1_ep = self._ps_endpoints.split(",")
ps0_cmd = "%s %s pserver %s 0 %s %d TRUE" % \
(self._python_interp, model_file, self._ps_endpoints, ps0_ep,
......@@ -143,11 +143,23 @@ class TestDistBase(unittest.TestCase):
(self._python_interp, model_file, self._ps_endpoints, ps1_ep,
self._trainers)
ps0_pipe = subprocess.PIPE
ps1_pipe = subprocess.PIPE
if check_error_log:
print("ps0_cmd:", ps0_cmd)
print("ps1_cmd:", ps1_cmd)
ps0_pipe = open("/tmp/ps0_err.log", "wb")
ps1_pipe = open("/tmp/ps1_err.log", "wb")
ps0_proc = subprocess.Popen(
ps0_cmd.split(" "), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
ps0_cmd.split(" "), stdout=subprocess.PIPE, stderr=ps0_pipe)
ps1_proc = subprocess.Popen(
ps1_cmd.split(" "), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
return ps0_proc, ps1_proc
ps1_cmd.split(" "), stdout=subprocess.PIPE, stderr=ps1_pipe)
if not check_error_log:
return ps0_proc, ps1_proc, None, None
else:
return ps0_proc, ps1_proc, ps0_pipe, ps1_pipe
def _wait_ps_ready(self, pid):
retry_times = 50
......@@ -164,7 +176,7 @@ class TestDistBase(unittest.TestCase):
(e, retry_times))
retry_times -= 1
def check_with_place(self, model_file, delta=1e-3):
def check_with_place(self, model_file, delta=1e-3, check_error_log=False):
# *ATTENTION* THIS TEST NEEDS AT LEAST 2GPUS TO RUN
required_envs = {
"PATH": os.getenv("PATH"),
......@@ -173,17 +185,32 @@ class TestDistBase(unittest.TestCase):
"FLAGS_fraction_of_gpu_memory_to_use": "0.15",
"FLAGS_cudnn_deterministic": "1"
}
if check_error_log:
required_envs["GLOG_v"] = "7"
required_envs["GLOG_logtostderr"] = "1"
# Run local to get a base line
env_local = {"CUDA_VISIBLE_DEVICES": "0"}
env_local.update(required_envs)
local_cmd = "%s %s trainer %s 0 %s %d FLASE" % \
(self._python_interp, model_file,
"127.0.0.1:1234", "127.0.0.1:1234", 1)
local_proc = subprocess.Popen(
local_cmd.split(" "),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
env=env_local)
if not check_error_log:
local_proc = subprocess.Popen(
local_cmd.split(" "),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
env=env_local)
else:
print("trainer cmd:", local_cmd)
err_log = open("/tmp/trainer.err.log", "wb")
local_proc = subprocess.Popen(
local_cmd.split(" "),
stdout=subprocess.PIPE,
stderr=err_log,
env=env_local)
local_proc.wait()
out, err = local_proc.communicate()
local_ret = cpt.to_text(out)
......@@ -191,7 +218,8 @@ class TestDistBase(unittest.TestCase):
sys.stderr.write('local_stderr: %s\n' % err)
# Run dist train to compare with local results
ps0, ps1 = self.start_pserver(model_file)
ps0, ps1, ps0_pipe, ps1_pipe = self.start_pserver(model_file,
check_error_log)
self._wait_ps_ready(ps0.pid)
self._wait_ps_ready(ps1.pid)
......@@ -209,15 +237,23 @@ class TestDistBase(unittest.TestCase):
env1.update(required_envs)
FNULL = open(os.devnull, 'w')
tr0_pipe = subprocess.PIPE
tr1_pipe = subprocess.PIPE
if check_error_log:
print("tr0_cmd:", tr0_cmd)
print("tr1_cmd:", tr1_cmd)
tr0_pipe = open("/tmp/tr0_err.log", "wb")
tr1_pipe = open("/tmp/tr1_err.log", "wb")
tr0_proc = subprocess.Popen(
tr0_cmd.split(" "),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
stderr=tr0_pipe,
env=env0)
tr1_proc = subprocess.Popen(
tr1_cmd.split(" "),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
stderr=tr1_pipe,
env=env1)
tr0_proc.wait()
......@@ -234,6 +270,13 @@ class TestDistBase(unittest.TestCase):
local_first_loss = eval(local_lines[0])[0]
local_last_loss = eval(local_lines[1])[0]
# close trainer file
if check_error_log:
tr0_pipe.close()
tr1_pipe.close()
ps0_pipe.close()
ps1_pipe.close()
# FIXME: use terminate() instead of sigkill.
os.kill(ps0.pid, signal.SIGKILL)
os.kill(ps1.pid, signal.SIGKILL)
......
......@@ -260,7 +260,7 @@ class TestLRDecayConditional(TranspilerTest):
serv_op = pserver.blocks[0].ops[0]
sub_blocks = []
optimize_blocks = []
for b in serv_op.attrs["optimize_blocks"]:
for b in serv_op.all_attrs()["optimize_blocks"]:
optimize_blocks.append(b.idx)
for b in pserver.blocks:
if b.idx not in optimize_blocks:
......
......@@ -17,6 +17,7 @@ import unittest
from paddle.fluid.framework import Program, default_main_program, program_guard, grad_var_name
import paddle.fluid.layers as layers
import paddle.fluid as fluid
main_program = default_main_program()
......@@ -98,6 +99,39 @@ class TestProgram(unittest.TestCase):
new_program = main_program.clone()
self.assertNotEqual(0, len(new_program.blocks[0].all_parameters()))
def test_program_inference_optimize(self):
def net():
reader = fluid.layers.py_reader(
capacity=10,
shapes=[[-1, 10], [-1, 1]],
lod_levels=[0, 0],
dtypes=['float32', 'int64'],
use_double_buffer=True)
in_data, label = fluid.layers.read_file(reader)
predict_label = fluid.layers.fc(in_data, size=2, act='softmax')
loss = fluid.layers.mean(
fluid.layers.cross_entropy(
input=predict_label, label=label))
optimizer = fluid.optimizer.Adam()
optimizer.minimize(loss)
startup_program = fluid.Program()
main_program = fluid.Program()
with fluid.program_guard(main_program, startup_program):
net()
no_read_program = main_program.inference_optimize()
keep_read_program = main_program.inference_optimize(
export_for_deployment=False)
no_read_ops = no_read_program.global_block().ops
keep_read_ops = keep_read_program.global_block().ops
self.assertEqual(len(keep_read_ops) - len(no_read_ops), 2)
self.assertEqual(keep_read_ops[0].type, 'create_double_buffer_reader')
self.assertEqual(keep_read_ops[1].type, 'read')
for i in range(len(no_read_ops)):
self.assertEqual(no_read_ops[i].type, keep_read_ops[i + 2].type)
if __name__ == '__main__':
unittest.main()
......@@ -69,7 +69,7 @@ class TestOpDesc(unittest.TestCase):
self.assertEqual(8, len(op.attr_names()))
op.set_block_attr("block_attr", program_desc.block(0))
self.assertEqual(0, op.block_attr("block_attr"))
self.assertEqual(0, op.block_attr_id("block_attr"))
mul_op = block.append_op()
mul_op.set_type("mul")
......
......@@ -586,12 +586,12 @@ class DistributeTranspiler(object):
if op.type in [
"gaussian_random", "fill_constant", "uniform_random"
]:
op.attrs["shape"] = new_outputs["Out"].shape
op.set_attr("shape", list(new_outputs["Out"].shape))
s_prog.global_block().append_op(
type=op.type,
inputs=new_inputs,
outputs=new_outputs,
attrs=op.attrs)
attrs=op.all_attrs())
return s_prog
# ====================== private transpiler functions =====================
......@@ -605,7 +605,7 @@ class DistributeTranspiler(object):
self.table_name = None
for op in self.origin_program.global_block().ops:
if op.type == LOOKUP_TABLE_TYPE:
if op.attrs['is_distributed'] is True:
if op.attr('is_distributed') is True:
if self.table_name is None:
self.table_name = op.input("W")[0]
if self.table_name != op.input("W")[0]:
......@@ -1265,7 +1265,7 @@ class DistributeTranspiler(object):
type=opt_op.type,
inputs=new_inputs,
outputs=outputs,
attrs=opt_op.attrs)
attrs=opt_op.all_attrs())
def _is_splited_grad_var(self, var, var_dict):
grad_block = None
......@@ -1296,7 +1296,7 @@ class DistributeTranspiler(object):
block._clone_variable(var)
return block.append_op(
type=op.type, inputs=inputs, outputs=outputs, attrs=op.attrs)
type=op.type, inputs=inputs, outputs=outputs, attrs=op.all_attrs())
def _append_pserver_non_opt_ops(self, optimize_block, opt_op):
program = optimize_block.program
......@@ -1337,7 +1337,7 @@ class DistributeTranspiler(object):
type=opt_op.type,
inputs=inputs,
outputs=outputs,
attrs=opt_op.attrs)
attrs=opt_op.all_attrs())
def _is_op_connected(self, op1, op2):
# If one op's input is another op's output or
......@@ -1442,8 +1442,8 @@ class DistributeTranspiler(object):
# optimize
op_maker = core.op_proto_and_checker_maker
optimize_role = core.op_proto_and_checker_maker.OpRole.Optimize
if op_maker.kOpRoleAttrName() in op.attrs and \
int(op.attrs[op_maker.kOpRoleAttrName()]) == int(optimize_role):
if op_maker.kOpRoleAttrName() in op.attr_names and \
int(op.all_attrs()[op_maker.kOpRoleAttrName()]) == int(optimize_role):
return True
return False
......@@ -1466,8 +1466,8 @@ class DistributeTranspiler(object):
# and op_role_var to get the pair.
for input_name in op.input_arg_names:
if input_name.find("@GRAD") != -1 and \
op.attrs[RPC_OP_ROLE_ATTR_NAME]:
param_name = op.attrs[OP_ROLE_VAR_ATTR_NAME][0]
op.attr(RPC_OP_ROLE_ATTR_NAME):
param_name = op.attr(OP_ROLE_VAR_ATTR_NAME)[0]
params_grads.append([
origin_var_dict[param_name],
origin_var_dict[input_name]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册