提交 ce909664 编写于 作者: T tensor-tang

Merge remote-tracking branch 'ups/develop' into refine/seqpool/feed

......@@ -37,14 +37,18 @@ INCLUDE(GNUInstallDirs)
INCLUDE(ExternalProject)
SET(NGRAPH_PROJECT "extern_ngraph")
SET(NGRAPH_GIT_TAG "08851c2c45fcf9fa9c74871dd3dbc3fe38f37cc9")
SET(NGRAPH_GIT_TAG "20bd8bbc79ae3a81c57313846a2be7313e5d1dab")
SET(NGRAPH_SOURCES_DIR ${THIRD_PARTY_PATH}/ngraph)
SET(NGRAPH_INSTALL_DIR ${THIRD_PARTY_PATH}/install/ngraph)
SET(NGRAPH_INC_DIR ${NGRAPH_INSTALL_DIR}/include)
SET(NGRAPH_LIB_DIR ${NGRAPH_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR})
SET(NGRAPH_SHARED_LIB_NAME libngraph.so)
SET(NGRAPH_CPU_LIB_NAME libcpu_backend.so)
SET(NGRAPH_TBB_LIB_NAME libtbb.so.2)
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
SET(NGRAPH_TBB_LIB_NAME libtbb_debug.so.2)
else()
SET(NGRAPH_TBB_LIB_NAME libtbb.so.2)
endif()
SET(NGRAPH_GIT_REPO "https://github.com/NervanaSystems/ngraph.git")
SET(NGRAPH_SHARED_LIB ${NGRAPH_LIB_DIR}/${NGRAPH_SHARED_LIB_NAME})
SET(NGRAPH_CPU_LIB ${NGRAPH_LIB_DIR}/${NGRAPH_CPU_LIB_NAME})
......@@ -66,16 +70,7 @@ ExternalProject_Add(
CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
CMAKE_ARGS -DMKLDNN_INCLUDE_DIR=${MKLDNN_INC_DIR}
CMAKE_ARGS -DMKLDNN_LIB_DIR=${MKLDNN_INSTALL_DIR}/lib
)
# Workaround for nGraph expecting mklml to be in mkldnn install directory.
ExternalProject_Add_Step(
${NGRAPH_PROJECT}
PrepareMKL
COMMAND ${CMAKE_COMMAND} -E create_symlink ${MKLML_LIB} ${MKLDNN_INSTALL_DIR}/lib/libmklml_intel.so
COMMAND ${CMAKE_COMMAND} -E create_symlink ${MKLML_IOMP_LIB} ${MKLDNN_INSTALL_DIR}/lib/libiomp5.so
DEPENDEES download
DEPENDERS configure
CMAKE_ARGS -DMKLML_LIB_DIR=${MKLML_INSTALL_DIR}/lib
)
add_dependencies(ngraph ${NGRAPH_PROJECT})
......
......@@ -226,7 +226,7 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilderBase::ApplyImpl(
* Only variables should be the leaves of graph.
*/
AddOutputToLeafOps(&result);
result.Erase<GraphOps>(kGraphOps);
result.Erase(kGraphOps);
return graph;
}
......
......@@ -109,7 +109,6 @@ class Graph {
attr_dels_[attr_name] = []() {};
}
template <typename AttrType>
void Erase(const std::string &attr_name) {
PADDLE_ENFORCE(attrs_.count(attr_name) != 0, "%s not set in the graph",
attr_name);
......
......@@ -116,6 +116,10 @@ D
--modeldir=$DATA_DIR/mobilenet/model \
--data=$DATA_DIR/mobilenet/data.txt \
--refer=$DATA_DIR/mobilenet/result.txt
if [ $? -ne 0 ]; then
echo "trt demo trt_mobilenet_demo runs fail."
exit 1
fi
fi
done
set +x
......@@ -38,8 +38,8 @@ void Main() {
std::unique_ptr<PaddlePredictor> predictor;
paddle::contrib::AnalysisConfig config;
config.EnableUseGpu(100, 0);
config.SetModel(FLAGS_modeldir + "/__params__",
FLAGS_modeldir + "/__model__");
config.SetModel(FLAGS_modeldir + "/__model__",
FLAGS_modeldir + "/__params__");
config.EnableTensorRtEngine();
predictor = CreatePaddlePredictor(config);
......
......@@ -283,7 +283,7 @@ TEST(Analyzer_rnn1, multi_thread) {
std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all);
TestPrediction(reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
input_slots_all, &outputs, 4 /* multi_thread */);
input_slots_all, &outputs, 2 /* multi_thread */);
}
// Validate that the AnalysisPredictor + ZeroCopyTensor really works by testing
......
......@@ -195,6 +195,10 @@ struct SelectedRowsAddToTensor<platform::CPUDeviceContext, T> {
void operator()(const platform::CPUDeviceContext& context,
const framework::SelectedRows& input1,
framework::Tensor* input2) {
if (UNLIKELY(input1.rows().size() == 0)) {
LOG(WARNING) << "input selected rows is empty!";
return;
}
auto in1_height = input1.height();
auto in2_dims = input2->dims();
PADDLE_ENFORCE_EQ(in1_height, in2_dims[0]);
......
......@@ -41,7 +41,9 @@ class SumOp : public framework::OperatorWithKernel {
return; // skip runtime infershape when is tensor array;
}
auto x_var_types = ctx->GetInputsVarType("X");
auto x_dims = ctx->GetInputsDim("X");
size_t N = x_dims.size();
PADDLE_ENFORCE_GT(N, 0, "Input tensors count should > 0.");
if (N == 1) {
......@@ -49,7 +51,13 @@ class SumOp : public framework::OperatorWithKernel {
}
framework::DDim in_dim({0});
for (auto& x_dim : x_dims) {
for (size_t i = 0; i < x_dims.size(); ++i) {
auto& x_dim = x_dims[i];
// x_dim.size() == 1 means the real dim of selected rows is [0]
if (x_var_types[i] == framework::proto::VarType::SELECTED_ROWS &&
x_dim.size() == 1) {
continue;
}
if (framework::product(x_dim) == 0) {
continue;
}
......
......@@ -3,7 +3,7 @@ set(PYBIND_DEPS pybind python proto_desc memory executor async_executor prune fe
if(WITH_PYTHON)
list(APPEND PYBIND_DEPS py_func_op)
endif()
set(PYBIND_SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc async_executor_py.cc imperative.cc)
set(PYBIND_SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc async_executor_py.cc imperative.cc ir.cc)
if(WITH_PYTHON)
if(WITH_AMD_GPU)
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/pybind/ir.h"
#include <string>
#include <unordered_map>
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/node.h"
#include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/var_desc.h"
#include "pybind11/stl.h"
namespace py = pybind11;
using paddle::framework::ir::Graph;
using paddle::framework::ir::Node;
using paddle::framework::OpDesc;
using paddle::framework::ProgramDesc;
using paddle::framework::VarDesc;
using pybind11::return_value_policy;
namespace paddle {
namespace pybind {
void BindGraph(py::module *m) {
py::class_<Graph, std::shared_ptr<Graph>>(
*m, "Graph",
"The graph is a Directed Acyclic Single Static Assignment Graph, see "
"`paddle::ir::Graph` for details.")
.def(py::init<const ProgramDesc &>())
.def("has", &Graph::Has)
.def("get_int", &Graph::Get<int>)
.def("get_float", &Graph::Get<float>)
.def("get_double", &Graph::Get<double>)
.def("get_string", &Graph::Get<std::string>)
.def("set", [](Graph &self, const std::string &attr_name,
int attr) { return self.Set(attr_name, new int(attr)); })
.def("set",
[](Graph &self, const std::string &attr_name,
const std::string &attr) {
return self.Set(attr_name, new std::string(attr));
})
.def("set",
[](Graph &self, const std::string &attr_name, float attr) {
return self.Set(attr_name, new float(attr));
})
.def("set",
[](Graph &self, const std::string &attr_name, double attr) {
return self.Set(attr_name, new double(attr));
})
.def("erase", &Graph::Erase)
.def("nodes", &Graph::Nodes, return_value_policy::reference)
.def("create_var_node",
[](Graph &self, VarDesc &var_desc) {
return self.CreateVarNode(&var_desc);
},
return_value_policy::reference)
.def("create_op_node",
[](Graph &self, OpDesc &op_desc) {
return self.CreateOpNode(&op_desc);
},
return_value_policy::reference)
.def("create_control_dep_var", &Graph::CreateControlDepVar,
return_value_policy::reference)
.def("create_empty_node", &Graph::CreateEmptyNode,
return_value_policy::reference)
.def("release_nodes", &Graph::ReleaseNodes)
.def("remove_node",
[](Graph &self, Node &node) { return self.RemoveNode(&node); })
.def("retrieve_node", &Graph::RetrieveNode,
return_value_policy::reference)
.def("resolve_hazard", &Graph::ResolveHazard);
}
void BindNode(py::module *m) {
py::class_<Node> node(*m, "Node");
node.def("name", &Node::Name)
.def("node_type", &Node::NodeType)
.def("var", &Node::Var)
.def("op", &Node::Op)
.def("id", &Node::id)
.def("is_op", &Node::IsOp)
.def("is_var", &Node::IsVar)
.def("is_ctrl_var", &Node::IsCtrlVar)
.def_readwrite("inputs", &Node::inputs)
.def_readwrite("outputs", &Node::outputs);
py::enum_<Node::Type>(node, "Type")
.value("Operation", Node::Type::kOperation)
.value("Variable", Node::Type::kVariable)
.export_values();
}
} // namespace pybind
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <pybind11/pybind11.h>
#include "paddle/fluid/framework/ir/graph.h"
namespace paddle {
namespace pybind {
void BindGraph(pybind11::module *m);
void BindNode(pybind11::module *m);
} // namespace pybind
} // namespace paddle
......@@ -49,6 +49,7 @@ limitations under the License. */
#include "paddle/fluid/pybind/const_value.h"
#include "paddle/fluid/pybind/exception.h"
#include "paddle/fluid/pybind/imperative.h"
#include "paddle/fluid/pybind/ir.h"
#include "paddle/fluid/pybind/protobuf.h"
#include "paddle/fluid/pybind/pybind.h" // NOLINT
#include "paddle/fluid/pybind/recordio.h"
......@@ -775,7 +776,12 @@ All parameter, weight, gradient are variables in Paddle.
})
.def("set_int", [](ir::Pass &self, const std::string &name,
int val) { self.Set<const int>(name, new int(val)); })
.def("type", &ir::Pass::Type);
.def("type", &ir::Pass::Type)
.def("apply", [](ir::Pass &self, std::shared_ptr<ir::Graph> graph) {
std::unique_ptr<ir::Graph> origin_graph(graph.get());
auto optim_graph = self.Apply(std::move(origin_graph));
graph.reset(optim_graph.release());
});
py::class_<ir::PassBuilder, std::shared_ptr<ir::PassBuilder>> pb(
m, "PassBuilder");
......@@ -1042,6 +1048,9 @@ All parameter, weight, gradient are variables in Paddle.
BindRecordIOWriter(&m);
BindAsyncExecutor(&m);
BindGraph(&m);
BindNode(&m);
}
} // namespace pybind
} // namespace paddle
......@@ -71,10 +71,25 @@ class DataToLoDTensorConverter(object):
for each_data in data:
self._feed_impl_(each_data, lod[1:], lod_level - 1)
def _check_shape(self, shape):
for s1, s2 in zip(self.shape, shape):
if s1 != s2 and s1 >= 0 and s2 >= 0:
raise ValueError(
"Shape not match. What is defined in data layer is {}, but receive {}".
format(self.shape, shape))
def done(self):
arr = numpy.array(self.data, dtype=self.dtype)
if self.shape and len(arr.shape) != len(self.shape):
arr = arr.reshape(self.shape)
if self.shape:
if len(arr.shape) != len(self.shape):
try:
arr = arr.reshape(self.shape)
except ValueError:
raise ValueError(
"Reshape error. What is defined in data layer is {}, but receive {}"
.format(self.shape, arr.shape))
else:
self._check_shape(arr.shape)
t = core.LoDTensor()
t.set(arr, self.place)
if self.lod_level > 0:
......@@ -152,17 +167,8 @@ class DataFeeder(object):
raise TypeError("Feed list should contain a list of variable")
self.feed_dtypes.append(each_var.dtype)
self.feed_names.append(each_var.name)
shape = each_var.shape
batch_size_dim = -1
for i, s in enumerate(shape):
if s < 0:
batch_size_dim = i
break
if batch_size_dim == -1:
raise ValueError("Variable {0} must has a batch size dimension",
each_var.name)
self.feed_lod_level.append(each_var.lod_level)
self.feed_shapes.append(shape)
self.feed_shapes.append(each_var.shape)
self.place = place
......
......@@ -30,6 +30,12 @@ class TestDataFeeder(unittest.TestCase):
self.assertEqual(result['image'].recursive_sequence_lengths(), [])
self.assertEqual(result['label'].recursive_sequence_lengths(), [])
try:
result = feeder.feed([([0] * 783, [9]), ([1] * 783, [1])])
self.assertTrue(False)
except ValueError:
self.assertTrue(True)
def test_lod_level_1_converter(self):
# lod_level = 1
# each sentence has a different number of words
......
......@@ -31,6 +31,7 @@ fluid.default_main_program().random_seed = 1
class TestDistCTR2x2(TestDistRunnerBase):
def get_model(self, batch_size=2):
dnn_input_dim, lr_input_dim = dist_ctr_reader.load_data_meta()
""" network definition """
dnn_data = fluid.layers.data(
......@@ -97,7 +98,14 @@ class TestDistCTR2x2(TestDistRunnerBase):
inference_program = paddle.fluid.default_main_program().clone()
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.0001)
regularization = None
use_l2_decay = bool(os.getenv('USE_L2_DECAY', 0))
if use_l2_decay:
regularization = fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-1)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.0001,
regularization=regularization)
sgd_optimizer.minimize(avg_cost)
dataset = dist_ctr_reader.Dataset()
......
......@@ -235,7 +235,6 @@ class DistSeResneXt2x2(TestDistRunnerBase):
bd = [step * e for e in epochs]
base_lr = 0.1
lr = []
lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)]
optimizer = fluid.optimizer.Momentum(
......
......@@ -18,7 +18,6 @@ import unittest
from test_dist_base import TestDistBase
# FIXME(tangwei): sum op can not handle when inputs is empty.
class TestDistCTR2x2(TestDistBase):
def _setup_config(self):
self._sync_mode = True
......@@ -28,5 +27,19 @@ class TestDistCTR2x2(TestDistBase):
self.check_with_place("dist_ctr.py", delta=1e-7, check_error_log=False)
class TestDistCTRWithL2Decay2x2(TestDistBase):
def _setup_config(self):
self._sync_mode = True
self._enforce_place = "CPU"
def test_dist_ctr(self):
need_envs = {"USE_L2_DECAY": "1"}
self.check_with_place(
"dist_ctr.py",
delta=1e-7,
check_error_log=False,
need_envs=need_envs)
if __name__ == "__main__":
unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import unittest
import six
from paddle import fluid
class TestIRGraph(unittest.TestCase):
"""
TODO(fc500110): `resolve_hazard` api will be tested when it can be used.
"""
def test_nodes(self):
graph = build_graph()
self.assertTrue(
{node.name()
for node in graph.nodes()} == {"x1", "x2", "out", "sum"})
def test_has_set_get(self):
graph = build_graph()
for attr_name in ["int", "float", "string"]:
self.assertFalse(graph.has(attr_name))
graph.set("int", 1)
graph.set("float", 0.5)
graph.set("string", "string")
for attr_name in ["int", "float", "string"]:
self.assertTrue(graph.has(attr_name))
self.assertTrue(graph.get_int("int") == 1)
self.assertTrue(graph.get_float("float") == 0.5)
self.assertTrue(graph.get_string("string") == "string")
def test_erase(self):
graph = build_graph()
graph.set("test", 0)
self.assertTrue(graph.has("test"))
graph.erase("test")
self.assertFalse(graph.has("test"))
def test_create_var_node(self):
prog = fluid.core.ProgramDesc()
block = prog.block(0)
shape = [10, 20]
x1 = block.var(six.b("x1"))
x1.set_type(fluid.core.VarDesc.VarType.LOD_TENSOR)
x1.set_shape(shape)
graph = fluid.core.Graph(prog)
node = graph.create_var_node(x1)
self.assertTrue(node.node_type() == fluid.core.Node.Type.Variable)
def test_create_op_node(self):
prog = fluid.core.ProgramDesc()
block = prog.block(0)
sum_op_desc = block.append_op()
graph = fluid.core.Graph(prog)
node = graph.create_op_node(sum_op_desc)
self.assertTrue(node.node_type() == fluid.core.Node.Type.Operation)
def test_create_control_dep_var(self):
graph = build_graph()
name = "__control_var@{}".format(len(graph.nodes()))
node = graph.create_control_dep_var()
self.assertTrue(node.name() == name)
def test_create_empty_node(self):
prog = fluid.core.ProgramDesc()
graph = fluid.core.Graph(prog)
n1 = graph.create_empty_node('x', fluid.core.Node.Type.Operation)
self.assertTrue(n1.name() == 'x')
n2 = graph.create_empty_node('y', fluid.core.Node.Type.Variable)
self.assertTrue(n2.name() == 'y')
def test_release_nodes(self):
graph = build_graph()
nodes = graph.release_nodes()
self.assertTrue(len(graph.nodes()) == 0)
self.assertTrue({node.name()
for node in nodes} == {"x1", "x2", "out", "sum"})
def test_remove_node(self):
graph = build_graph()
nodes = graph.nodes()
for node in nodes:
if node.name() == "sum":
break
self.assertTrue({node.name()
for node in nodes} == {"x1", "x2", "out", "sum"})
nodes.remove(node)
self.assertTrue({node.name() for node in nodes} == {"x1", "x2", "out"})
def test_retrieve_node(self):
graph = build_graph()
nodes = []
for i in range(len(graph.nodes())):
nodes.append(graph.retrieve_node(i))
for node in nodes:
self.assertTrue(node in graph.nodes())
def resolve_hazard(self):
pass
def build_graph():
prog = fluid.core.ProgramDesc()
block = prog.block(0)
shape = [10, 20]
# prepare input/output
x1 = block.var(six.b("x1"))
x1.set_type(fluid.core.VarDesc.VarType.LOD_TENSOR)
x1.set_shape(shape)
x2 = block.var(six.b("x2"))
x2.set_type(fluid.core.VarDesc.VarType.LOD_TENSOR)
x2.set_shape(shape)
out = block.var(six.b("out"))
out.set_type(fluid.core.VarDesc.VarType.LOD_TENSOR)
sum_op_desc = block.append_op()
sum_op_desc.set_type("sum")
sum_op_desc.set_input("X", ["x1", "x2"])
sum_op_desc.set_output("Out", ["out"])
sum_op_desc.check_attrs()
sum_op_desc.infer_shape(block)
graph = fluid.core.Graph(prog)
return graph
if __name__ == "__main__":
unittest.main()
......@@ -752,12 +752,6 @@ class DistributeTranspiler(object):
elif op not in lr_ops:
self._append_pserver_non_opt_ops(block, op)
def __op_have_grad_input__(op):
for varname in op.input_arg_names:
if varname.find("@GRAD") >= 0:
return varname
return ""
def __clone_lr_op_sub_block__(op, program, lr_block):
if not op.has_attr('sub_block'):
return
......@@ -808,7 +802,7 @@ class DistributeTranspiler(object):
merged_var = None
for _, op in enumerate(self.optimize_ops):
# find the origin grad var before clipping/L2Decay,
# merged_var should be the input var name of L2Decaybuil
# merged_var should be the input var name of L2Decay
grad_varname_for_block = op.attr(OP_ROLE_VAR_ATTR_NAME)[1]
if op.attr(OP_ROLE_VAR_ATTR_NAME)[
0] == optimize_target_param_name:
......@@ -1684,7 +1678,16 @@ class DistributeTranspiler(object):
if self.config.enable_dc_asgd:
new_inputs[key] = dc
else:
new_inputs[key] = merged_var
# Note!! This is for l2decay on sparse gradient, because it will create a new tensor for
# decayed gradient but not inplace modify the origin one
origin_grad_name = opt_op.input(key)[0]
if core.kNewGradSuffix(
) in origin_grad_name and pserver_block.has_var(
origin_grad_name):
new_grad = pserver_block.var(origin_grad_name)
new_inputs[key] = new_grad
else:
new_inputs[key] = merged_var
elif key == "Param":
param_block = _get_param_block(opt_op)
if not param_block:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册