提交 d12252e6 编写于 作者: X xuezhong

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into add_sample_logits_op

test=develop
...@@ -203,7 +203,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) ...@@ -203,7 +203,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
ENDIF() ENDIF()
SET(PROTOBUF_REPO "https://github.com/google/protobuf.git") SET(PROTOBUF_REPO "https://github.com/google/protobuf.git")
SET(PROTOBUF_TAG "9f75c5aa851cd877fb0d93ccc31b8567a6706546") SET(PROTOBUF_TAG "v3.6.1")
ExternalProject_Add( ExternalProject_Add(
${TARGET_NAME} ${TARGET_NAME}
...@@ -231,7 +231,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) ...@@ -231,7 +231,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
) )
ENDFUNCTION() ENDFUNCTION()
SET(PROTOBUF_VERSION 3.1) SET(PROTOBUF_VERSION 3.6.1)
IF(NOT PROTOBUF_FOUND) IF(NOT PROTOBUF_FOUND)
build_protobuf(extern_protobuf FALSE) build_protobuf(extern_protobuf FALSE)
......
...@@ -74,8 +74,8 @@ IF(PYTHONINTERP_FOUND) ...@@ -74,8 +74,8 @@ IF(PYTHONINTERP_FOUND)
find_python_module(wheel REQUIRED) find_python_module(wheel REQUIRED)
find_python_module(google.protobuf REQUIRED) find_python_module(google.protobuf REQUIRED)
FIND_PACKAGE(NumPy REQUIRED) FIND_PACKAGE(NumPy REQUIRED)
IF(${PY_GOOGLE.PROTOBUF_VERSION} AND ${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0") IF(${PY_GOOGLE.PROTOBUF_VERSION} AND ${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.6.1")
MESSAGE(FATAL_ERROR "Found Python Protobuf ${PY_GOOGLE.PROTOBUF_VERSION} < 3.0.0, " MESSAGE(FATAL_ERROR "Found Python Protobuf ${PY_GOOGLE.PROTOBUF_VERSION} < 3.6.1, "
"please use pip to upgrade protobuf. pip install -U protobuf") "please use pip to upgrade protobuf. pip install -U protobuf")
ENDIF() ENDIF()
ENDIF(PYTHONINTERP_FOUND) ENDIF(PYTHONINTERP_FOUND)
......
...@@ -262,7 +262,7 @@ paddle.fluid.layers.increment ArgSpec(args=['x', 'value', 'in_place'], varargs=N ...@@ -262,7 +262,7 @@ paddle.fluid.layers.increment ArgSpec(args=['x', 'value', 'in_place'], varargs=N
paddle.fluid.layers.array_write ArgSpec(args=['x', 'i', 'array'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.array_write ArgSpec(args=['x', 'i', 'array'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.create_array ArgSpec(args=['dtype'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.create_array ArgSpec(args=['dtype'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.less_than ArgSpec(args=['x', 'y', 'force_cpu', 'cond'], varargs=None, keywords='ignored', defaults=(None, None)) paddle.fluid.layers.less_than ArgSpec(args=['x', 'y', 'force_cpu', 'cond'], varargs=None, keywords='ignored', defaults=(None, None))
paddle.fluid.layers.equal ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords='ignored', defaults=(None,)) paddle.fluid.layers.equal ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.array_read ArgSpec(args=['array', 'i'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.array_read ArgSpec(args=['array', 'i'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.array_length ArgSpec(args=['array'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.array_length ArgSpec(args=['array'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.IfElse.__init__ ArgSpec(args=['self', 'cond', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.IfElse.__init__ ArgSpec(args=['self', 'cond', 'name'], varargs=None, keywords=None, defaults=(None,))
...@@ -474,11 +474,11 @@ paddle.fluid.LoDTensor.has_valid_recursive_sequence_lengths has_valid_recursive_ ...@@ -474,11 +474,11 @@ paddle.fluid.LoDTensor.has_valid_recursive_sequence_lengths has_valid_recursive_
paddle.fluid.LoDTensor.lod lod(self: paddle.fluid.core.LoDTensor) -> List[List[int]] paddle.fluid.LoDTensor.lod lod(self: paddle.fluid.core.LoDTensor) -> List[List[int]]
paddle.fluid.LoDTensor.recursive_sequence_lengths recursive_sequence_lengths(self: paddle.fluid.core.LoDTensor) -> List[List[int]] paddle.fluid.LoDTensor.recursive_sequence_lengths recursive_sequence_lengths(self: paddle.fluid.core.LoDTensor) -> List[List[int]]
paddle.fluid.LoDTensor.set 1. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float32], arg1: paddle::platform::CPUPlace) -> None 2. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int32], arg1: paddle::platform::CPUPlace) -> None 3. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float64], arg1: paddle::platform::CPUPlace) -> None 4. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int64], arg1: paddle::platform::CPUPlace) -> None 5. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[bool], arg1: paddle::platform::CPUPlace) -> None 6. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint16], arg1: paddle::platform::CPUPlace) -> None 7. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint8], arg1: paddle::platform::CPUPlace) -> None 8. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int8], arg1: paddle::platform::CPUPlace) -> None 9. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float32], arg1: paddle::platform::CUDAPlace) -> None 10. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int32], arg1: paddle::platform::CUDAPlace) -> None 11. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float64], arg1: paddle::platform::CUDAPlace) -> None 12. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int64], arg1: paddle::platform::CUDAPlace) -> None 13. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[bool], arg1: paddle::platform::CUDAPlace) -> None 14. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint16], arg1: paddle::platform::CUDAPlace) -> None 15. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint8], arg1: paddle::platform::CUDAPlace) -> None 16. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int8], arg1: paddle::platform::CUDAPlace) -> None 17. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float32], arg1: paddle::platform::CUDAPinnedPlace) -> None 18. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int32], arg1: paddle::platform::CUDAPinnedPlace) -> None 19. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float64], arg1: paddle::platform::CUDAPinnedPlace) -> None 20. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int64], arg1: paddle::platform::CUDAPinnedPlace) -> None 21. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[bool], arg1: paddle::platform::CUDAPinnedPlace) -> None 22. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint16], arg1: paddle::platform::CUDAPinnedPlace) -> None 23. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint8], arg1: paddle::platform::CUDAPinnedPlace) -> None 24. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int8], arg1: paddle::platform::CUDAPinnedPlace) -> None paddle.fluid.LoDTensor.set 1. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float32], arg1: paddle::platform::CPUPlace) -> None 2. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int32], arg1: paddle::platform::CPUPlace) -> None 3. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float64], arg1: paddle::platform::CPUPlace) -> None 4. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int64], arg1: paddle::platform::CPUPlace) -> None 5. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[bool], arg1: paddle::platform::CPUPlace) -> None 6. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint16], arg1: paddle::platform::CPUPlace) -> None 7. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint8], arg1: paddle::platform::CPUPlace) -> None 8. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int8], arg1: paddle::platform::CPUPlace) -> None 9. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float32], arg1: paddle::platform::CUDAPlace) -> None 10. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int32], arg1: paddle::platform::CUDAPlace) -> None 11. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float64], arg1: paddle::platform::CUDAPlace) -> None 12. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int64], arg1: paddle::platform::CUDAPlace) -> None 13. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[bool], arg1: paddle::platform::CUDAPlace) -> None 14. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint16], arg1: paddle::platform::CUDAPlace) -> None 15. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint8], arg1: paddle::platform::CUDAPlace) -> None 16. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int8], arg1: paddle::platform::CUDAPlace) -> None 17. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float32], arg1: paddle::platform::CUDAPinnedPlace) -> None 18. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int32], arg1: paddle::platform::CUDAPinnedPlace) -> None 19. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float64], arg1: paddle::platform::CUDAPinnedPlace) -> None 20. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int64], arg1: paddle::platform::CUDAPinnedPlace) -> None 21. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[bool], arg1: paddle::platform::CUDAPinnedPlace) -> None 22. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint16], arg1: paddle::platform::CUDAPinnedPlace) -> None 23. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint8], arg1: paddle::platform::CUDAPinnedPlace) -> None 24. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int8], arg1: paddle::platform::CUDAPinnedPlace) -> None
paddle.fluid.LoDTensor.set_lod set_lod(self: paddle.fluid.core.LoDTensor, arg0: List[List[int]]) -> None paddle.fluid.LoDTensor.set_lod set_lod(self: paddle.fluid.core.LoDTensor, lod: List[List[int]]) -> None
paddle.fluid.LoDTensor.set_recursive_sequence_lengths set_recursive_sequence_lengths(self: paddle.fluid.core.LoDTensor, arg0: List[List[int]]) -> None paddle.fluid.LoDTensor.set_recursive_sequence_lengths set_recursive_sequence_lengths(self: paddle.fluid.core.LoDTensor, recursive_sequence_lengths: List[List[int]]) -> None
paddle.fluid.LoDTensor.shape shape(self: paddle.fluid.core.Tensor) -> List[int] paddle.fluid.LoDTensor.shape shape(self: paddle.fluid.core.Tensor) -> List[int]
paddle.fluid.LoDTensorArray.__init__ __init__(self: paddle.fluid.core.LoDTensorArray) -> None paddle.fluid.LoDTensorArray.__init__ __init__(self: paddle.fluid.core.LoDTensorArray) -> None
paddle.fluid.LoDTensorArray.append append(self: paddle.fluid.core.LoDTensorArray, arg0: paddle.fluid.core.LoDTensor) -> None paddle.fluid.LoDTensorArray.append append(self: paddle.fluid.core.LoDTensorArray, tensor: paddle.fluid.core.LoDTensor) -> None
paddle.fluid.CPUPlace.__init__ __init__(self: paddle.fluid.core.CPUPlace) -> None paddle.fluid.CPUPlace.__init__ __init__(self: paddle.fluid.core.CPUPlace) -> None
paddle.fluid.CUDAPlace.__init__ __init__(self: paddle.fluid.core.CUDAPlace, arg0: int) -> None paddle.fluid.CUDAPlace.__init__ __init__(self: paddle.fluid.core.CUDAPlace, arg0: int) -> None
paddle.fluid.CUDAPinnedPlace.__init__ __init__(self: paddle.fluid.core.CUDAPinnedPlace) -> None paddle.fluid.CUDAPinnedPlace.__init__ __init__(self: paddle.fluid.core.CUDAPinnedPlace) -> None
......
...@@ -50,7 +50,12 @@ cc_library(data_balance_op_handle SRCS data_balance_op_handle.cc DEPS op_handle_ ...@@ -50,7 +50,12 @@ cc_library(data_balance_op_handle SRCS data_balance_op_handle.cc DEPS op_handle_
cc_library(gather_op_handle SRCS gather_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor) cc_library(gather_op_handle SRCS gather_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor)
cc_library(fuse_vars_op_handle SRCS fuse_vars_op_handle.cc DEPS op_handle_base scope) cc_library(fuse_vars_op_handle SRCS fuse_vars_op_handle.cc DEPS op_handle_base scope)
cc_library(memory_optimize_helper SRCS memory_optimize_helper.cc DEPS graph graph_helper) if(WITH_GPU)
cc_library(memory_optimize_helper SRCS memory_optimize_helper.cc DEPS graph graph_helper gpu_info)
else()
cc_library(memory_optimize_helper SRCS memory_optimize_helper.cc DEPS graph graph_helper cpu_info)
endif()
cc_library(memory_optimize_pass SRCS memory_optimize_pass.cc DEPS memory_optimize_helper pass) cc_library(memory_optimize_pass SRCS memory_optimize_pass.cc DEPS memory_optimize_helper pass)
cc_library(inplace_op_pass SRCS inplace_op_pass.cc DEPS memory_optimize_pass op_info) cc_library(inplace_op_pass SRCS inplace_op_pass.cc DEPS memory_optimize_pass op_info)
cc_library(modify_op_lock_and_record_event_pass SRCS modify_op_lock_and_record_event_pass.cc DEPS computation_op_handle op_graph_view multi_devices_helper) cc_library(modify_op_lock_and_record_event_pass SRCS modify_op_lock_and_record_event_pass.cc DEPS computation_op_handle op_graph_view multi_devices_helper)
......
...@@ -240,7 +240,9 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply( ...@@ -240,7 +240,9 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply(
continue; continue;
} }
} }
VLOG(3) << "Start Apply Pass " << pass->Type();
graph = pass->Apply(std::move(graph)); graph = pass->Apply(std::move(graph));
VLOG(3) << "Finish Apply Pass " << pass->Type();
} }
return graph; return graph;
} }
......
...@@ -49,7 +49,7 @@ DEFINE_bool( ...@@ -49,7 +49,7 @@ DEFINE_bool(
"If this option turns on, only these op in whitelist can be inplaced." "If this option turns on, only these op in whitelist can be inplaced."
"If it turns off, all of the running op can be candidate of inplaced op." "If it turns off, all of the running op can be candidate of inplaced op."
"Such as scale, elementwise_add" "Such as scale, elementwise_add"
"By default, it's turned on"); "By default, it's turned off");
DECLARE_string(memory_optimize_debug); DECLARE_string(memory_optimize_debug);
......
...@@ -13,13 +13,19 @@ ...@@ -13,13 +13,19 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/framework/details/memory_optimize_helper.h" #include "paddle/fluid/framework/details/memory_optimize_helper.h"
#include <algorithm>
#include <deque> #include <deque>
#include <functional> #include <functional>
#include <iostream> #include <iterator>
#include <numeric> #include <numeric>
#include <sstream> #include <sstream>
#include <string> #include <string>
#include "paddle/fluid/framework/var_desc.h" #include "paddle/fluid/framework/var_desc.h"
#include "paddle/fluid/platform/cpu_info.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/gpu_info.h"
#endif // PADDLE_WITH_CUDA
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -166,6 +172,11 @@ struct NodeComparator { ...@@ -166,6 +172,11 @@ struct NodeComparator {
bool operator()(ir::Node* lhs, ir::Node* rhs) const { bool operator()(ir::Node* lhs, ir::Node* rhs) const {
auto* lhs_desc = FindVarDescInBlock(lhs); auto* lhs_desc = FindVarDescInBlock(lhs);
auto* rhs_desc = FindVarDescInBlock(rhs); auto* rhs_desc = FindVarDescInBlock(rhs);
// match data type
if (lhs_desc->GetDataType() != rhs_desc->GetDataType()) {
return false;
}
// match shape
auto lhs_shape = lhs_desc->GetShape(); auto lhs_shape = lhs_desc->GetShape();
auto rhs_shape = rhs_desc->GetShape(); auto rhs_shape = rhs_desc->GetShape();
if ((lhs_shape[0] == -1 && rhs_shape[0] == -1) || if ((lhs_shape[0] == -1 && rhs_shape[0] == -1) ||
...@@ -230,6 +241,27 @@ ir::Node* OrderedSet::FindBestFitNode(ir::Node* var) const { ...@@ -230,6 +241,27 @@ ir::Node* OrderedSet::FindBestFitNode(ir::Node* var) const {
return found_node; return found_node;
} }
ir::Node* OrderedSet::FindNextBestFitNode(ir::Node* var, ir::Node* prev) const {
ir::Node* found_node = nullptr;
NodeComparator functor;
auto it =
std::find_if(nodes_.begin(), nodes_.end(), [&](const NodeVector& v) {
if (v.front() == prev)
return true;
else
return false;
});
PADDLE_ENFORCE(it != nodes_.end(), "Not found previous in node list!");
for (it = std::next(it); it != nodes_.end(); ++it) {
auto& candidate = it->front();
if (functor(var, candidate)) {
found_node = candidate;
break;
}
}
return found_node;
}
bool OrderedSet::Has(ir::Node* var) const { bool OrderedSet::Has(ir::Node* var) const {
if (mark_table_.count(var->Name())) { if (mark_table_.count(var->Name())) {
auto& node_in_samename = mark_table_.at(var->Name()); auto& node_in_samename = mark_table_.at(var->Name());
...@@ -241,10 +273,15 @@ bool OrderedSet::Has(ir::Node* var) const { ...@@ -241,10 +273,15 @@ bool OrderedSet::Has(ir::Node* var) const {
return false; return false;
} }
void OrderedSet::Erase(const std::string& var) {
PADDLE_ENFORCE(mark_table_.count(var));
nodes_.erase(mark_table_[var]);
mark_table_.erase(var);
}
void OrderedSet::Erase(ir::Node* var) { void OrderedSet::Erase(ir::Node* var) {
PADDLE_ENFORCE(mark_table_.count(var->Name())); PADDLE_ENFORCE(var != nullptr);
nodes_.erase(mark_table_[var->Name()]); Erase(var->Name());
mark_table_.erase(var->Name());
} }
std::string OrderedSet::ToString() const { std::string OrderedSet::ToString() const {
...@@ -274,14 +311,35 @@ bool NodeCanReused(ir::Node* node) { ...@@ -274,14 +311,35 @@ bool NodeCanReused(ir::Node* node) {
return flag; return flag;
} }
int MinChunkSize() {
int size{0};
#ifdef PADDLE_WITH_CUDA
size = platform::GpuMinChunkSize();
#else
size = platform::CpuMinChunkSize();
#endif // PADDLE_WITH_CUDA
return size;
}
bool NodeCanReused(const VarDesc& node) { bool NodeCanReused(const VarDesc& node) {
auto type = node.GetType(); auto type = node.GetType();
// only these types holds bulk of gpu memory
if (!(type == proto::VarType::LOD_TENSOR || if (!(type == proto::VarType::LOD_TENSOR ||
type == proto::VarType::SELECTED_ROWS || type == proto::VarType::SELECTED_ROWS ||
type == proto::VarType::LOD_TENSOR_ARRAY)) { type == proto::VarType::LOD_TENSOR_ARRAY)) {
return false; return false;
} }
if (node.Persistable() || node.GetShape().empty()) { // persistable variable is parameter
if (node.Persistable()) {
return false;
}
// shape < min_chunk_size is meaningless.
// further more, fetched loss always has size = 1
// which should not be reused.
auto shape = node.GetShape();
int size = std::abs(
std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>()));
if (shape.empty() || size < MinChunkSize()) {
return false; return false;
} }
// vars can be @EMPTY@, @LR_DECAY_REUSE_ID@. For example, while_grad // vars can be @EMPTY@, @LR_DECAY_REUSE_ID@. For example, while_grad
...@@ -461,7 +519,9 @@ ir::Node* ControlFlowGraph::GetNodeByName(const std::string& name, ...@@ -461,7 +519,9 @@ ir::Node* ControlFlowGraph::GetNodeByName(const std::string& name,
for (auto* node : ops_) { for (auto* node : ops_) {
if (node == op) break; if (node == op) break;
for (auto& output : node->outputs) { for (auto& output : node->outputs) {
if (output->Name() == name) { PADDLE_ENFORCE((output != nullptr && output->IsVar()),
"Output is empty!");
if (output->Var() && output->Name() == name) {
found_node = output; found_node = output;
} }
} }
......
...@@ -55,6 +55,7 @@ class OrderedSet { ...@@ -55,6 +55,7 @@ class OrderedSet {
void Insert(ir::Node* var); void Insert(ir::Node* var);
void Erase(ir::Node* var); void Erase(ir::Node* var);
void Erase(const std::string& var);
bool Has(ir::Node* var) const; bool Has(ir::Node* var) const;
void Clear() { void Clear() {
mark_table_.clear(); mark_table_.clear();
...@@ -62,6 +63,7 @@ class OrderedSet { ...@@ -62,6 +63,7 @@ class OrderedSet {
} }
// find the bestfit shape node block with var. // find the bestfit shape node block with var.
ir::Node* FindBestFitNode(ir::Node* var) const; ir::Node* FindBestFitNode(ir::Node* var) const;
ir::Node* FindNextBestFitNode(ir::Node* var, ir::Node* prev) const;
// map store non-const iterator, can not promise const // map store non-const iterator, can not promise const
int GetNodeIndexInPool(ir::Node* var); int GetNodeIndexInPool(ir::Node* var);
// pool all node to string // pool all node to string
......
...@@ -107,6 +107,52 @@ TEST(OrderedSet, Normal) { ...@@ -107,6 +107,52 @@ TEST(OrderedSet, Normal) {
ASSERT_EQ(pool.GetNodeIndexInPool(cache), 5); // match 4:[5,2] ASSERT_EQ(pool.GetNodeIndexInPool(cache), 5); // match 4:[5,2]
} }
} }
TEST(OrderedSet, FindBestFitNode) {
OrderedSet pool;
std::vector<std::unique_ptr<ir::Node>> nodes;
ProgramDesc prog;
BlockDesc* block_desc = prog.MutableBlock(0);
auto* op_desc = block_desc->AppendOp();
op_desc->SetType("dummy");
std::unique_ptr<ir::Node> op = ir::CreateNodeForTest(op_desc);
{
auto desc = block_desc->Var("a");
desc->SetShape({128, 128});
std::unique_ptr<ir::Node> node = ir::CreateNodeForTest(desc);
node->inputs.emplace_back(op.get());
nodes.emplace_back(std::move(node));
}
{
auto desc = block_desc->Var("b");
desc->SetShape({128, 129});
std::unique_ptr<ir::Node> node = ir::CreateNodeForTest(desc);
node->inputs.emplace_back(op.get());
nodes.emplace_back(std::move(node));
}
{
auto desc = block_desc->Var("c");
desc->SetShape({128, 128});
std::unique_ptr<ir::Node> node = ir::CreateNodeForTest(desc);
node->inputs.emplace_back(op.get());
nodes.emplace_back(std::move(node));
}
for (auto& node : nodes) {
pool.Insert(node.get());
}
// FindNextBestFitNode
auto* n = nodes[0].get();
auto* cache = pool.FindBestFitNode(n);
PADDLE_ENFORCE(cache->Name() == "a");
cache = pool.FindNextBestFitNode(n, cache);
PADDLE_ENFORCE(cache->Name() == "c");
cache = pool.FindNextBestFitNode(n, cache);
PADDLE_ENFORCE(cache->Name() == "b");
}
} // namespace details } // namespace details
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
......
...@@ -69,55 +69,59 @@ std::unique_ptr<ir::Graph> MemoryOptimizePass::ApplyImpl( ...@@ -69,55 +69,59 @@ std::unique_ptr<ir::Graph> MemoryOptimizePass::ApplyImpl(
} }
for (auto& var : op->outputs) { for (auto& var : op->outputs) {
if (!NodeCanReused(var) || cfg_->Use(op).count(var->Name()) == 0 || if (var->IsVar() && !var->IsCtrlVar() && skip_set_.count(var->Name())) {
skip_set_.count(var->Name())) VLOG(3) << "Skip set contains variable of " << var->Name()
<< "disable reuse on it. skipped";
continue; continue;
ir::Node* cache = pool_.FindBestFitNode(var);
if (var->Name() == FLAGS_memory_optimize_debug) {
VLOG(3) << "start match var " << DebugString(var) << " of op "
<< op->Name();
VLOG(3) << pool_.ToString();
VLOG(3) << "matched in pool : "
<< ((cache == nullptr) ? "False" : "True");
} }
if (NodeCanReused(var) && cfg_->Use(op).count(var->Name()) == 0) {
ir::Node* cache = pool_.FindBestFitNode(var);
while (cache != nullptr && var->Name() == cache->Name()) {
VLOG(3) << "The same cache variable is cascade reused. "
<< cache->Name() << " is re-filled to the pool after "
<< "the reused op is finished. Current op can not "
<< "replace it again. Skip this candidate.";
cache = pool_.FindNextBestFitNode(var, cache);
}
if (var->Name() == FLAGS_memory_optimize_debug) {
VLOG(3) << "start match var " << DebugString(var) << " of op "
<< op->Name();
VLOG(3) << pool_.ToString();
VLOG(3) << "matched in pool : "
<< ((cache == nullptr) ? "False" : "True");
}
if (cache == nullptr) continue; if (cache != nullptr) {
if (var->Name() == cache->Name()) { int node_idx_in_pool = pool_.GetNodeIndexInPool(cache);
VLOG(3) << "The same cache variable is cascade reused." << var->Name() VLOG(3) << string::Sprintf(
<< " is re-filled to the pool after" "!!! %s, %s => %s, cache idx %d, pool size %d",
<< "the reused op is finished. Current op can not " std::to_string(reuse_id++), DebugString(var), DebugString(cache),
<< "replace it again. Skip this candidate."; node_idx_in_pool, static_cast<int>(pool_.size()));
continue; // NOTE(dzhwinter): update the ProgramDesc/IR Graph
// and the CFG Graph on the fly.
int node_idx_in_pool = pool_.GetNodeIndexInPool(cache); //
VLOG(3) << string::Sprintf( // IR Graph define the dependence relationship between nodes.
"!!! %s, %s => %s, cache idx %d, pool size %d", //
std::to_string(reuse_id++), DebugString(var), DebugString(cache), // ProgramDesc defines the input/output vars. Its used in
node_idx_in_pool, static_cast<int>(pool_.size())); // CreateOp, CreateVar when running happens.
//
// update CFG Graph on the fly. // CFG Graph store the liveness information, when reuse happens
// reused var maybe re-fill into the pool // we also need to update the variable liveness.
cfg_->RenameVarInCFGGraph(var->Name(), cache->Name(), idx); const std::string var_name = var->Name();
// NOTE(dzhwinter): we need to both update the ProgramDesc const std::string cache_name = cache->Name();
// and IR Graph. because op_desc/var_desc is used in CreateOp,
// CreateVar when running happens. But IR Graph
// define the dependence relationship between nodes.
RenameVarInGraphDesc(var->Name(), cache->Name(), idx);
RenameVarInGraphNode(var->Name(), cache->Name(), idx, graph.get());
pool_.Erase(cache);
}
// fill the pool cfg_->RenameVarInCFGGraph(var_name, cache_name, idx);
std::unordered_set<std::string> unlived_vars; RenameVarInGraphDesc(var_name, cache_name, idx);
for (auto var : cfg_->LiveIn(op)) { RenameVarInGraphNode(var_name, cache_name, idx, graph.get());
if (cfg_->LiveOut(op).count(var) == 0) { pool_.Erase(cache_name);
unlived_vars.emplace(var);
} }
} }
for (auto var : unlived_vars) { }
// fill the pool
for (auto var : cfg_->LiveIn(op)) {
if (cfg_->LiveOut(op).count(var) == 0) {
ir::Node* var_node = cfg_->GetNodeByName(var, op); ir::Node* var_node = cfg_->GetNodeByName(var, op);
if (var_node == nullptr || var_node->IsCtrlVar()) continue;
if (NodeCanReused(var_node) && !pool_.Has(var_node)) { if (NodeCanReused(var_node) && !pool_.Has(var_node)) {
pool_.Insert(var_node); pool_.Insert(var_node);
} }
...@@ -273,8 +277,7 @@ void MemoryOptimizePass::RenameVarInGraphNode(const std::string& var, ...@@ -273,8 +277,7 @@ void MemoryOptimizePass::RenameVarInGraphNode(const std::string& var,
// redirect the input to the latest version of cache_var // redirect the input to the latest version of cache_var
for (auto* node : op->inputs) { for (auto* node : op->inputs) {
if (node->Name() == var) { if (node->Name() == var) {
ir::Node* cache_node = graph->CreateVarNode(var_desc.get()); ir::Node* cache_node = var_nodes_[cache_var].back();
var_nodes_[cache_var].emplace_back(cache_node);
// swap node to cache_node // swap node to cache_node
cache_node->outputs.insert(cache_node->outputs.end(), cache_node->outputs.insert(cache_node->outputs.end(),
...@@ -283,11 +286,15 @@ void MemoryOptimizePass::RenameVarInGraphNode(const std::string& var, ...@@ -283,11 +286,15 @@ void MemoryOptimizePass::RenameVarInGraphNode(const std::string& var,
auto* prev_op = node->inputs[0]; auto* prev_op = node->inputs[0];
std::replace(prev_op->outputs.begin(), prev_op->outputs.end(), node, std::replace(prev_op->outputs.begin(), prev_op->outputs.end(), node,
cache_node); cache_node);
cache_node->inputs.emplace_back(prev_op);
for (auto* next_op : node->outputs) { for (auto* next_op : node->outputs) {
std::replace(next_op->inputs.begin(), next_op->inputs.end(), node, std::replace(next_op->inputs.begin(), next_op->inputs.end(), node,
cache_node); cache_node);
} }
// erase unused node
auto& nodes = var_nodes_.at(var);
nodes.erase(std::remove(nodes.begin(), nodes.end(), node), nodes.end());
graph->RemoveNode(node);
} }
} }
...@@ -307,15 +314,14 @@ void MemoryOptimizePass::RenameVarInGraphNode(const std::string& var, ...@@ -307,15 +314,14 @@ void MemoryOptimizePass::RenameVarInGraphNode(const std::string& var,
std::replace(next_op->inputs.begin(), next_op->inputs.end(), node, std::replace(next_op->inputs.begin(), next_op->inputs.end(), node,
cache_node); cache_node);
} }
// erase unused node
auto& nodes = var_nodes_.at(var);
nodes.erase(std::remove(nodes.begin(), nodes.end(), node), nodes.end());
graph->RemoveNode(node);
} }
} }
} }
// release node of unused var in graph
for (auto* node : var_nodes_[var]) {
graph->RemoveNode(node);
}
var_nodes_.at(var).clear();
} }
} // namespace details } // namespace details
......
...@@ -179,11 +179,11 @@ TEST(InferInplace, SingleOpInplaceInToOut) { ...@@ -179,11 +179,11 @@ TEST(InferInplace, SingleOpInplaceInToOut) {
op->SetOutput("Out", {"test2_out"}); op->SetOutput("Out", {"test2_out"});
prog.MutableBlock(0)->Var("test2_a")->SetType(proto::VarType::LOD_TENSOR); prog.MutableBlock(0)->Var("test2_a")->SetType(proto::VarType::LOD_TENSOR);
prog.MutableBlock(0)->Var("test2_a")->SetShape({32, 64}); prog.MutableBlock(0)->Var("test2_a")->SetShape({32, 64, 128, 128});
prog.MutableBlock(0)->Var("test2_b")->SetType(proto::VarType::LOD_TENSOR); prog.MutableBlock(0)->Var("test2_b")->SetType(proto::VarType::LOD_TENSOR);
prog.MutableBlock(0)->Var("test2_c")->SetType(proto::VarType::LOD_TENSOR); prog.MutableBlock(0)->Var("test2_c")->SetType(proto::VarType::LOD_TENSOR);
prog.MutableBlock(0)->Var("test2_out"); prog.MutableBlock(0)->Var("test2_out");
prog.MutableBlock(0)->Var("test2_out")->SetShape({32, 16}); prog.MutableBlock(0)->Var("test2_out")->SetShape({32, 16, 128, 128});
auto& infer_inplace = OpInfoMap::Instance().Get(op->Type()).infer_inplace_; auto& infer_inplace = OpInfoMap::Instance().Get(op->Type()).infer_inplace_;
auto in_to_outs = infer_inplace(*op, op->Block()); auto in_to_outs = infer_inplace(*op, op->Block());
...@@ -201,11 +201,11 @@ TEST(InferInplace, SingleGradOpInplaceInToOut) { ...@@ -201,11 +201,11 @@ TEST(InferInplace, SingleGradOpInplaceInToOut) {
op->SetOutput(GradVarName("X"), {"test2_a", "test2_b", "test2_c"}); op->SetOutput(GradVarName("X"), {"test2_a", "test2_b", "test2_c"});
prog.MutableBlock(0)->Var("test2_a")->SetType(proto::VarType::LOD_TENSOR); prog.MutableBlock(0)->Var("test2_a")->SetType(proto::VarType::LOD_TENSOR);
prog.MutableBlock(0)->Var("test2_a")->SetShape({32, 16}); prog.MutableBlock(0)->Var("test2_a")->SetShape({32, 16, 1024, 1024});
prog.MutableBlock(0)->Var("test2_b")->SetType(proto::VarType::LOD_TENSOR); prog.MutableBlock(0)->Var("test2_b")->SetType(proto::VarType::LOD_TENSOR);
prog.MutableBlock(0)->Var("test2_c")->SetType(proto::VarType::LOD_TENSOR); prog.MutableBlock(0)->Var("test2_c")->SetType(proto::VarType::LOD_TENSOR);
prog.MutableBlock(0)->Var("test2_out"); prog.MutableBlock(0)->Var("test2_out");
prog.MutableBlock(0)->Var("test2_out")->SetShape({32, 16}); prog.MutableBlock(0)->Var("test2_out")->SetShape({32, 16, 1024, 1024});
auto& infer_inplace = OpInfoMap::Instance().Get(op->Type()).infer_inplace_; auto& infer_inplace = OpInfoMap::Instance().Get(op->Type()).infer_inplace_;
auto in_to_outs = infer_inplace(*op, op->Block()); auto in_to_outs = infer_inplace(*op, op->Block());
...@@ -233,12 +233,12 @@ TEST(InferInplace, MultiOutInplaceInToOut) { ...@@ -233,12 +233,12 @@ TEST(InferInplace, MultiOutInplaceInToOut) {
prog.MutableBlock(0)->Var("o0"); prog.MutableBlock(0)->Var("o0");
prog.MutableBlock(0)->Var("y0"); prog.MutableBlock(0)->Var("y0");
prog.MutableBlock(0)->Var("z0"); prog.MutableBlock(0)->Var("z0");
prog.MutableBlock(0)->Var("a0")->SetShape({32, 16}); prog.MutableBlock(0)->Var("a0")->SetShape({32, 16, 1024, 1024});
prog.MutableBlock(0)->Var("b0")->SetShape({32, 16}); prog.MutableBlock(0)->Var("b0")->SetShape({32, 16, 1024, 1024});
prog.MutableBlock(0)->Var("c0")->SetShape({32, 16}); prog.MutableBlock(0)->Var("c0")->SetShape({32, 16, 1024, 1024});
prog.MutableBlock(0)->Var("o0")->SetShape({32, 16}); prog.MutableBlock(0)->Var("o0")->SetShape({32, 16, 1024, 1024});
prog.MutableBlock(0)->Var("y0")->SetShape({32, 16}); prog.MutableBlock(0)->Var("y0")->SetShape({32, 16, 1024, 1024});
prog.MutableBlock(0)->Var("z0")->SetShape({32, 16}); prog.MutableBlock(0)->Var("z0")->SetShape({32, 16, 1024, 1024});
auto& infer_inplace = OpInfoMap::Instance().Get(op->Type()).infer_inplace_; auto& infer_inplace = OpInfoMap::Instance().Get(op->Type()).infer_inplace_;
auto in_to_outs = infer_inplace(*op, op->Block()); auto in_to_outs = infer_inplace(*op, op->Block());
...@@ -267,12 +267,12 @@ TEST(InferInplace, MultiGradInplaceInToOut) { ...@@ -267,12 +267,12 @@ TEST(InferInplace, MultiGradInplaceInToOut) {
prog.MutableBlock(0)->Var("o0"); prog.MutableBlock(0)->Var("o0");
prog.MutableBlock(0)->Var("y0"); prog.MutableBlock(0)->Var("y0");
prog.MutableBlock(0)->Var("z0"); prog.MutableBlock(0)->Var("z0");
prog.MutableBlock(0)->Var("a0")->SetShape({32, 16}); prog.MutableBlock(0)->Var("a0")->SetShape({32, 16, 1024, 1024});
prog.MutableBlock(0)->Var("b0")->SetShape({32, 16}); prog.MutableBlock(0)->Var("b0")->SetShape({32, 16, 1024, 1024});
prog.MutableBlock(0)->Var("c0")->SetShape({32, 16}); prog.MutableBlock(0)->Var("c0")->SetShape({32, 16, 1024, 1024});
prog.MutableBlock(0)->Var("o0")->SetShape({32, 16}); prog.MutableBlock(0)->Var("o0")->SetShape({32, 16, 1024, 1024});
prog.MutableBlock(0)->Var("y0")->SetShape({32, 16}); prog.MutableBlock(0)->Var("y0")->SetShape({32, 16, 1024, 1024});
prog.MutableBlock(0)->Var("z0")->SetShape({32, 16}); prog.MutableBlock(0)->Var("z0")->SetShape({32, 16, 1024, 1024});
auto& infer_inplace = OpInfoMap::Instance().Get(op->Type()).infer_inplace_; auto& infer_inplace = OpInfoMap::Instance().Get(op->Type()).infer_inplace_;
auto in_to_outs = infer_inplace(*op, op->Block()); auto in_to_outs = infer_inplace(*op, op->Block());
......
...@@ -51,6 +51,11 @@ class CompareOpProtoMaker : public framework::OpProtoAndCheckerMaker { ...@@ -51,6 +51,11 @@ class CompareOpProtoMaker : public framework::OpProtoAndCheckerMaker {
comment.type)); comment.type));
AddInput("Y", string::Sprintf("the right hand operand of %s operator", AddInput("Y", string::Sprintf("the right hand operand of %s operator",
comment.type)); comment.type));
AddAttr<int>(
"axis",
"The start dimension index for broadcasting Y onto X. [default -1]")
.SetDefault(-1)
.EqualGreaterThan(-1);
AddAttr<bool>("force_cpu", AddAttr<bool>("force_cpu",
"Force fill output variable to cpu " "Force fill output variable to cpu "
"memory. Otherwise, fill output variable to the running " "memory. Otherwise, fill output variable to the running "
...@@ -64,11 +69,6 @@ N-dim tensor. X and Y could be any type. The each element of the Out tensor is ...@@ -64,11 +69,6 @@ N-dim tensor. X and Y could be any type. The each element of the Out tensor is
calculated by $%s$ calculated by $%s$
)DOC", )DOC",
comment.equation)); comment.equation));
AddAttr<int>(
"axis",
"The start dimension index for broadcasting Y onto X. [default -1]")
.SetDefault(-1)
.EqualGreaterThan(-1);
} }
}; };
......
...@@ -21,6 +21,7 @@ limitations under the License. */ ...@@ -21,6 +21,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/operators/jit/kernels.h"
#include "paddle/fluid/operators/math/blas.h" #include "paddle/fluid/operators/math/blas.h"
namespace paddle { namespace paddle {
...@@ -37,32 +38,24 @@ struct EmbeddingVSumFunctor { ...@@ -37,32 +38,24 @@ struct EmbeddingVSumFunctor {
const LoDTensor *table_t, const LoDTensor *ids_t, const LoDTensor *table_t, const LoDTensor *ids_t,
LoDTensor *output_t) { LoDTensor *output_t) {
auto *table = table_t->data<T>(); auto *table = table_t->data<T>();
int64_t row_number = table_t->dims()[0]; int64_t table_height = table_t->dims()[0];
int64_t row_width = table_t->dims()[1]; int64_t table_width = table_t->dims()[1];
int64_t last_dim = output_t->dims()[1]; int64_t out_width = output_t->dims()[1];
const int64_t *ids = ids_t->data<int64_t>(); const int64_t *ids = ids_t->data<int64_t>();
auto ids_lod = ids_t->lod()[0]; auto ids_lod = ids_t->lod()[0];
int64_t ids_count = ids_t->numel() / ids_lod.back(); int64_t idx_width = ids_t->numel() / ids_lod.back();
auto *output = output_t->mutable_data<T>(context.GetPlace()); auto *output = output_t->mutable_data<T>(context.GetPlace());
auto blas = math::GetBlas<platform::CPUDeviceContext, T>(context); PADDLE_ENFORCE_LE(table_width * idx_width, out_width);
for (int64_t i = 0; i != ids_lod.size() - 1; ++i) {
size_t begin = ids_lod[i] * ids_count;
for (int64_t j = 0; j != ids_count; ++j) {
PADDLE_ENFORCE_LT(ids[begin], row_number);
PADDLE_ENFORCE_GE(ids[begin], 0, "ids %d", i);
blas.VCOPY(row_width, table + ids[begin + j] * row_width,
output + i * last_dim + j * row_width);
}
for (int64_t r = (ids_lod[i] + 1) * ids_count; jit::emb_seq_pool_attr_t attr(table_height, table_width, 0, idx_width,
r < ids_lod[i + 1] * ids_count; ++r) { out_width, jit::SeqPoolType::kSum);
PADDLE_ENFORCE_LT(ids[r], row_number); for (int64_t i = 0; i != ids_lod.size() - 1; ++i) {
PADDLE_ENFORCE_GE(ids[r], 0, "ids %d", i); attr.index_height = ids_lod[i + 1] - ids_lod[i];
blas.AXPY(row_width, 1., table + ids[r] * row_width, auto emb_seqpool = jit::Get<jit::kEmbSeqPool, jit::EmbSeqPoolTuples<T>,
output + i * last_dim + (r % ids_count) * row_width); platform::CPUPlace>(attr);
} emb_seqpool(table, ids + ids_lod[i] * idx_width, output + i * out_width,
&attr);
} }
} }
}; };
......
...@@ -301,6 +301,37 @@ void BenchSeqPoolKernel() { ...@@ -301,6 +301,37 @@ void BenchSeqPoolKernel() {
} }
} }
template <jit::KernelType KT, typename T, typename PlaceType>
void BenchEmbSeqPoolKernel() {
std::vector<jit::SeqPoolType> pool_types = {jit::SeqPoolType::kSum};
int64_t tbl_h = 1e4;
for (int tbl_w : {10, 16, 256}) {
Tensor table;
table.Resize({tbl_h, tbl_w});
RandomVec<T>(tbl_h * tbl_w, table.mutable_data<T>(PlaceType()), -2.f, 2.f);
const T* table_data = table.data<T>();
for (auto type : pool_types) {
for (int idx_w : {1, 2, 10, 16}) {
for (int idx_h : {1, 2, 9, 13, 16}) {
int64_t out_w = tbl_w * idx_w;
jit::emb_seq_pool_attr_t attr(tbl_h, tbl_w, idx_h, idx_w, out_w,
type);
Tensor idx, out;
idx.Resize({idx_h, idx_w});
out.Resize({out_w});
RandomVec<int64_t>(idx_h * idx_w,
idx.mutable_data<int64_t>(PlaceType()), 0,
tbl_h - 1);
const int64_t* idx_data = idx.data<int64_t>();
T* o_data = out.mutable_data<T>(PlaceType());
BenchAllImpls<KT, jit::EmbSeqPoolTuples<T>, PlaceType>(
attr, table_data, idx_data, o_data, &attr);
}
}
}
}
}
template <jit::KernelType KT, typename T, typename PlaceType> template <jit::KernelType KT, typename T, typename PlaceType>
void BenchMatMulKernel() { void BenchMatMulKernel() {
for (int m : {1, 2, 3, 4}) { for (int m : {1, 2, 3, 4}) {
...@@ -441,6 +472,11 @@ BENCH_FP32_CPU(kGRUHtPart2) { BenchGRUKernel<jit::kGRUHtPart2, T, CPUPlace>(); } ...@@ -441,6 +472,11 @@ BENCH_FP32_CPU(kGRUHtPart2) { BenchGRUKernel<jit::kGRUHtPart2, T, CPUPlace>(); }
// seq pool function // seq pool function
BENCH_FP32_CPU(kSeqPool) { BenchSeqPoolKernel<jit::kSeqPool, T, CPUPlace>(); } BENCH_FP32_CPU(kSeqPool) { BenchSeqPoolKernel<jit::kSeqPool, T, CPUPlace>(); }
// embedding seq pool function
BENCH_FP32_CPU(kEmbSeqPool) {
BenchEmbSeqPoolKernel<jit::kEmbSeqPool, T, CPUPlace>();
}
// matmul // matmul
BENCH_FP32_CPU(kMatMul) { BenchMatMulKernel<jit::kMatMul, T, CPUPlace>(); } BENCH_FP32_CPU(kMatMul) { BenchMatMulKernel<jit::kMatMul, T, CPUPlace>(); }
......
...@@ -31,3 +31,4 @@ USE_JITKERNEL_GEN(kNCHW16CMulNC) ...@@ -31,3 +31,4 @@ USE_JITKERNEL_GEN(kNCHW16CMulNC)
USE_JITKERNEL_GEN(kSeqPool) USE_JITKERNEL_GEN(kSeqPool)
USE_JITKERNEL_GEN(kHMax) USE_JITKERNEL_GEN(kHMax)
USE_JITKERNEL_GEN(kHSum) USE_JITKERNEL_GEN(kHSum)
USE_JITKERNEL_GEN(kEmbSeqPool)
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. */
#include "paddle/fluid/operators/jit/gen/embseqpool.h"
#include <stddef.h> // offsetof
#include <vector>
#include "paddle/fluid/operators/jit/gen/act.h" // for exp_float_consts ones
#include "paddle/fluid/operators/jit/registry.h"
#include "paddle/fluid/platform/cpu_info.h"
namespace paddle {
namespace operators {
namespace jit {
namespace gen {
void EmbSeqPoolJitCode::genCode() {
preCode();
constexpr int block = YMM_FLOAT_BLOCK;
constexpr int max_num_regs = 8;
const int num_block = tbl_w_ / block;
const int num_groups = num_block / max_num_regs;
const size_t block_size = sizeof(float) * block;
std::vector<int> groups(num_groups, max_num_regs);
int rest_num_regs = num_block % max_num_regs;
if (rest_num_regs > 0) {
groups.push_back(rest_num_regs);
}
// protect param_dst
mov(reg_ptr_param_dst, param_dst);
mov(reg_idx_width_in_byte,
qword[param_attr + offsetof(emb_seq_pool_attr_t, index_width)]);
mov(reg_idx_height,
qword[param_attr + offsetof(emb_seq_pool_attr_t, index_height)]);
mov(rax, sizeof(int64_t));
mul(reg_idx_width_in_byte);
mov(reg_idx_width_in_byte, rax);
const size_t tbl_width_in_byte = sizeof(float) * tbl_w_;
int acc_num_regs = 0;
for (int num_regs : groups) {
Label l_next_idx_w, l_next_idx_h, l_save_now;
xor_(reg_idx_w_i_in_byte, reg_idx_w_i_in_byte);
mov(reg_ptr_dst_i, reg_ptr_param_dst);
add(reg_ptr_dst_i, acc_num_regs * block_size);
L(l_next_idx_w);
{
// h == 0
mov(reg_ptr_idx_i, param_idx);
add(reg_ptr_idx_i, reg_idx_w_i_in_byte);
mov(reg_idx, qword[reg_ptr_idx_i]);
mov(rax, tbl_width_in_byte);
mul(reg_idx);
mov(reg_ptr_tbl_i, rax); // reg is offset now
add(reg_ptr_tbl_i, param_tbl); // reg is ptr_i now
size_t w_offset = 0;
for (int reg_i = 0; reg_i < num_regs; ++reg_i) {
vmovups(ymm_t(reg_i + num_regs), ptr[reg_ptr_tbl_i + w_offset]);
w_offset += block_size;
}
add(reg_ptr_idx_i, reg_idx_width_in_byte);
// end condition of idx h
mov(reg_idx_h_end, reg_idx_height);
mov(rax, reg_idx_width_in_byte);
mul(reg_idx_h_end);
mov(reg_idx_h_end, rax);
add(reg_idx_h_end, reg_idx_w_i_in_byte);
add(reg_idx_h_end, param_idx);
cmp(reg_ptr_idx_i, reg_idx_h_end);
jge(l_save_now, T_NEAR);
L(l_next_idx_h);
{
mov(reg_idx, qword[reg_ptr_idx_i]);
mov(reg_ptr_tbl_i, reg_idx);
mov(rax, tbl_width_in_byte);
mul(reg_idx);
mov(reg_ptr_tbl_i, rax);
add(reg_ptr_tbl_i, param_tbl);
size_t w_offset = 0;
for (int reg_i = 0; reg_i < num_regs; ++reg_i) {
vmovups(ymm_t(reg_i), ptr[reg_ptr_tbl_i + w_offset]);
vaddps(ymm_t(reg_i + num_regs), ymm_t(reg_i + num_regs),
ymm_t(reg_i));
w_offset += block_size;
}
add(reg_ptr_idx_i, reg_idx_width_in_byte);
cmp(reg_ptr_idx_i, reg_idx_h_end);
jl(l_next_idx_h, T_NEAR);
} // end of idx h
L(l_save_now);
// avg or sqrt here, if needed
w_offset = 0;
for (int reg_i = 0; reg_i < num_regs; ++reg_i) {
vmovups(ptr[reg_ptr_dst_i + w_offset], ymm_t(reg_i + num_regs));
w_offset += block_size;
}
add(reg_ptr_dst_i, tbl_width_in_byte);
add(reg_idx_w_i_in_byte, sizeof(int64_t));
cmp(reg_idx_w_i_in_byte, reg_idx_width_in_byte);
jl(l_next_idx_w, T_NEAR);
} // end of idx w
acc_num_regs += num_regs;
add(param_tbl, num_regs * block_size); // do not use acc_num_regs
} // end of groups
postCode();
}
class EmbSeqPoolCreator : public JitCodeCreator<emb_seq_pool_attr_t> {
public:
bool UseMe(const emb_seq_pool_attr_t& attr) const override {
return platform::MayIUse(platform::avx) &&
attr.table_width % YMM_FLOAT_BLOCK == 0;
}
size_t CodeSize(const emb_seq_pool_attr_t& attr) const override {
return 96 + (attr.table_width / YMM_FLOAT_BLOCK) * 96 * 8;
}
std::unique_ptr<GenBase> CreateJitCode(
const emb_seq_pool_attr_t& attr) const override {
PADDLE_ENFORCE_GT(attr.table_height, 0);
PADDLE_ENFORCE_GT(attr.table_width, 0);
PADDLE_ENFORCE_GT(attr.index_height, 0);
PADDLE_ENFORCE_GT(attr.index_width, 0);
PADDLE_ENFORCE_GT(attr.out_width, 0);
return make_unique<EmbSeqPoolJitCode>(attr, CodeSize(attr));
}
};
} // namespace gen
} // namespace jit
} // namespace operators
} // namespace paddle
namespace gen = paddle::operators::jit::gen;
REGISTER_JITKERNEL_GEN(kEmbSeqPool, gen::EmbSeqPoolCreator);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. */
#pragma once
#include <string>
#include "glog/logging.h"
#include "paddle/fluid/operators/jit/gen/jitcode.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace operators {
namespace jit {
namespace gen {
class EmbSeqPoolJitCode : public JitCode {
public:
explicit EmbSeqPoolJitCode(const emb_seq_pool_attr_t& attr,
size_t code_size = 256 * 1024,
void* code_ptr = nullptr)
: JitCode(code_size, code_ptr),
tbl_w_(attr.table_width),
type_(attr.pool_type) {
if (type_ != SeqPoolType::kSum) {
LOG(FATAL) << "Only support sum pool yet ";
}
this->genCode();
}
std::string name() const override {
std::string base = "EmbSeqPoolJitCode";
if (type_ == SeqPoolType::kSum) {
base += "_Sum";
} else if (type_ == SeqPoolType::kAvg) {
base += "_Avg";
} else if (type_ == SeqPoolType::kSqrt) {
base += "_Sqrt";
}
base += ("_W" + std::to_string(tbl_w_));
return base;
}
void genCode() override;
private:
int tbl_w_;
SeqPoolType type_;
reg64_t param_tbl{abi_param1};
reg64_t param_idx{abi_param2};
reg64_t param_dst{abi_param3};
reg64_t param_attr{abi_param4};
reg64_t reg_tmp{rax};
reg64_t reg_idx_width_in_byte{r8};
reg64_t reg_idx_height{r9};
reg64_t reg_ptr_tbl_i{r10};
reg64_t reg_idx{r10}; // could use same of reg_ptr_tbl_i
reg64_t reg_ptr_idx_i{r11};
reg64_t reg_ptr_dst_i{r12};
reg64_t reg_ptr_param_dst{r13}; // rdx is used in mul so protect param_dst
reg64_t reg_idx_w_i_in_byte{r14};
reg64_t reg_idx_h_end{r15};
};
} // namespace gen
} // namespace jit
} // namespace operators
} // namespace paddle
...@@ -32,7 +32,7 @@ class SeqPoolJitCode : public JitCode { ...@@ -32,7 +32,7 @@ class SeqPoolJitCode : public JitCode {
: JitCode(code_size, code_ptr), w_(attr.w), type_(attr.type) { : JitCode(code_size, code_ptr), w_(attr.w), type_(attr.type) {
if (!(type_ == SeqPoolType::kSum || type_ == SeqPoolType::kAvg || if (!(type_ == SeqPoolType::kSum || type_ == SeqPoolType::kAvg ||
type_ == SeqPoolType::kSqrt)) { type_ == SeqPoolType::kSqrt)) {
LOG(FATAL) << "Only support sum pool yet "; LOG(FATAL) << "Only supported pool type: sum, avg and sqrt.";
} }
fp_h_[0] = 1.f; fp_h_[0] = 1.f;
this->genCode(); this->genCode();
......
...@@ -54,6 +54,7 @@ const char* to_string(KernelType kt) { ...@@ -54,6 +54,7 @@ const char* to_string(KernelType kt) {
ONE_CASE(kHMax); ONE_CASE(kHMax);
ONE_CASE(kHSum); ONE_CASE(kHSum);
ONE_CASE(kSoftmax); ONE_CASE(kSoftmax);
ONE_CASE(kEmbSeqPool);
default: default:
PADDLE_THROW("Not support type: %d, or forget to add it.", kt); PADDLE_THROW("Not support type: %d, or forget to add it.", kt);
return "NOT JITKernel"; return "NOT JITKernel";
......
...@@ -172,6 +172,15 @@ inline std::ostream& operator<<(std::ostream& os, const seq_pool_attr_t& attr) { ...@@ -172,6 +172,15 @@ inline std::ostream& operator<<(std::ostream& os, const seq_pool_attr_t& attr) {
return os; return os;
} }
inline std::ostream& operator<<(std::ostream& os,
const emb_seq_pool_attr_t& attr) {
os << "table_height[" << attr.table_height << "],table_width["
<< attr.table_width << "],index_height[" << attr.index_height
<< "],index_width[" << attr.index_width << "],output_width["
<< attr.out_width << "],pool_type[" << to_string(attr.pool_type) << "]";
return os;
}
inline std::ostream& operator<<(std::ostream& os, const matmul_attr_t& attr) { inline std::ostream& operator<<(std::ostream& os, const matmul_attr_t& attr) {
os << "M[" << attr.m << "],N[" << attr.n << "],K[" << attr.k << "]"; os << "M[" << attr.m << "],N[" << attr.n << "],K[" << attr.k << "]";
return os; return os;
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
* limitations under the License. */ * limitations under the License. */
#pragma once #pragma once
#include <cstdint>
#include "paddle/fluid/operators/jit/macro.h" #include "paddle/fluid/operators/jit/macro.h"
#include "paddle/fluid/platform/macros.h" #include "paddle/fluid/platform/macros.h"
...@@ -20,34 +21,35 @@ namespace paddle { ...@@ -20,34 +21,35 @@ namespace paddle {
namespace operators { namespace operators {
namespace jit { namespace jit {
// TODO(TJ): reorder by alphabet
typedef enum { typedef enum {
kNone = 0, kNone = 0,
kVMul = 1, // sort by alphabet
kVAdd = 2, kCRFDecoding = 1,
kVAddRelu, kEmbSeqPool = 2,
kVSub,
kVScal,
kVAddBias,
kVRelu,
kVIdentity,
kVSquare,
kVExp,
kVSigmoid,
kVTanh,
kLSTMCtHt,
kLSTMC1H1,
kGRUH1, kGRUH1,
kGRUHtPart1, kGRUHtPart1,
kGRUHtPart2, kGRUHtPart2,
kCRFDecoding, kHSum, // horizontal max
kHMax, // horizontal sum
kLSTMCtHt,
kLSTMC1H1,
kLayerNorm, kLayerNorm,
kMatMul,
kNCHW16CMulNC, kNCHW16CMulNC,
kSeqPool, kSeqPool,
kMatMul,
kHSum, // horizontal max
kHMax, // horizontal sum
kSoftmax, kSoftmax,
kVAdd,
kVAddBias,
kVAddRelu,
kVExp,
kVIdentity,
kVMul,
kVRelu,
kVScal,
kVSigmoid,
kVSquare,
kVSub,
kVTanh,
} KernelType; } KernelType;
typedef enum { typedef enum {
...@@ -145,6 +147,32 @@ struct SeqPoolTuples { ...@@ -145,6 +147,32 @@ struct SeqPoolTuples {
typedef void (*func_type)(const T*, T*, const seq_pool_attr_t*); typedef void (*func_type)(const T*, T*, const seq_pool_attr_t*);
}; };
typedef struct emb_seq_pool_attr_s {
int64_t table_height, table_width;
int64_t index_height, index_width;
int64_t out_width;
SeqPoolType pool_type;
emb_seq_pool_attr_s() = default;
explicit emb_seq_pool_attr_s(int64_t tbl_height, int64_t tbl_width,
int64_t idx_height, int64_t idx_width,
int64_t output_width,
SeqPoolType seqpool_type = SeqPoolType::kSum)
: table_height(tbl_height),
table_width(tbl_width),
index_height(idx_height),
index_width(idx_width),
out_width(output_width),
pool_type(seqpool_type) {}
} emb_seq_pool_attr_t;
template <typename T>
struct EmbSeqPoolTuples {
typedef T data_type;
typedef emb_seq_pool_attr_t attr_type;
typedef void (*func_type)(const T*, const int64_t*, T*,
const emb_seq_pool_attr_t*);
};
typedef struct matmul_attr_s { typedef struct matmul_attr_s {
int m, n, k; int m, n, k;
void* packed_weight{nullptr}; void* packed_weight{nullptr};
......
...@@ -56,6 +56,11 @@ size_t JitCodeKey<matmul_attr_t>(const matmul_attr_t& attr) { ...@@ -56,6 +56,11 @@ size_t JitCodeKey<matmul_attr_t>(const matmul_attr_t& attr) {
return (key << shift * 2) + ((static_cast<size_t>(attr.n)) << shift) + attr.k; return (key << shift * 2) + ((static_cast<size_t>(attr.n)) << shift) + attr.k;
} }
template <>
size_t JitCodeKey<emb_seq_pool_attr_t>(const emb_seq_pool_attr_t& attr) {
return attr.table_width;
}
} // namespace jit } // namespace jit
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -13,3 +13,4 @@ USE_JITKERNEL_MORE(kVSigmoid, mkl) ...@@ -13,3 +13,4 @@ USE_JITKERNEL_MORE(kVSigmoid, mkl)
USE_JITKERNEL_MORE(kVTanh, mkl) USE_JITKERNEL_MORE(kVTanh, mkl)
USE_JITKERNEL_MORE(kSeqPool, mkl) USE_JITKERNEL_MORE(kSeqPool, mkl)
USE_JITKERNEL_MORE(kSoftmax, mkl) USE_JITKERNEL_MORE(kSoftmax, mkl)
USE_JITKERNEL_MORE(kEmbSeqPool, mkl)
...@@ -174,6 +174,16 @@ bool SeqPoolKernel<double>::UseMe(const seq_pool_attr_t& attr) const { ...@@ -174,6 +174,16 @@ bool SeqPoolKernel<double>::UseMe(const seq_pool_attr_t& attr) const {
return true; return true;
} }
template <>
bool EmbSeqPoolKernel<float>::UseMe(const emb_seq_pool_attr_t& attr) const {
return true;
}
template <>
bool EmbSeqPoolKernel<double>::UseMe(const emb_seq_pool_attr_t& attr) const {
return true;
}
template <> template <>
bool MatMulKernel<float>::UseMe(const matmul_attr_t& attr) const { bool MatMulKernel<float>::UseMe(const matmul_attr_t& attr) const {
return platform::MayIUse(platform::avx); return platform::MayIUse(platform::avx);
...@@ -227,6 +237,7 @@ REGISTER_MKL_KERNEL(kVSquare, VSquare); ...@@ -227,6 +237,7 @@ REGISTER_MKL_KERNEL(kVSquare, VSquare);
REGISTER_MKL_KERNEL(kVSigmoid, VSigmoid); REGISTER_MKL_KERNEL(kVSigmoid, VSigmoid);
REGISTER_MKL_KERNEL(kVTanh, VTanh); REGISTER_MKL_KERNEL(kVTanh, VTanh);
REGISTER_MKL_KERNEL(kSeqPool, SeqPool); REGISTER_MKL_KERNEL(kSeqPool, SeqPool);
REGISTER_MKL_KERNEL(kEmbSeqPool, EmbSeqPool);
REGISTER_MKL_KERNEL(kSoftmax, Softmax); REGISTER_MKL_KERNEL(kSoftmax, Softmax);
#undef REGISTER_MKL_KERNEL #undef REGISTER_MKL_KERNEL
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <type_traits> #include <type_traits>
#include <vector> #include <vector>
#include "paddle/fluid/operators/jit/kernel_base.h" #include "paddle/fluid/operators/jit/kernel_base.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -91,6 +92,32 @@ void SeqPool(const T* x, T* y, const seq_pool_attr_t* attr) { ...@@ -91,6 +92,32 @@ void SeqPool(const T* x, T* y, const seq_pool_attr_t* attr) {
} }
} }
template <typename T>
void EmbSeqPool(const T* table, const int64_t* idx, T* out,
const emb_seq_pool_attr_t* attr) {
PADDLE_ENFORCE_EQ(attr->table_width * attr->index_width, attr->out_width);
auto check_idx_value_valid = [&](int64_t i) {
PADDLE_ENFORCE_LT(idx[i], attr->table_height, "idx value: %d, i: %d",
idx[i], i);
PADDLE_ENFORCE_GE(idx[i], 0, "idx value: %d, i: %d", idx[i], i);
};
for (int64_t w = 0; w != attr->index_width; ++w) {
check_idx_value_valid(w);
VCopy<T>(table + idx[w] * attr->table_width, out + w * attr->table_width,
attr->table_width);
}
for (int64_t h = 1; h < attr->index_height; ++h) {
for (int64_t w = 0; w < attr->index_width; ++w) {
int64_t i = h * attr->index_width + w;
check_idx_value_valid(i);
VAXPY<T>(static_cast<T>(1), table + idx[i] * attr->table_width,
out + w * attr->table_width, attr->table_width);
}
}
}
template <typename T> template <typename T>
void ASum(const T* x, T* res, int n); void ASum(const T* x, T* res, int n);
...@@ -142,6 +169,8 @@ DECLARE_MKL_KERNEL(VSquare, XYNTuples); ...@@ -142,6 +169,8 @@ DECLARE_MKL_KERNEL(VSquare, XYNTuples);
DECLARE_MKL_KERNEL(SeqPool, SeqPoolTuples); DECLARE_MKL_KERNEL(SeqPool, SeqPoolTuples);
DECLARE_MKL_KERNEL(EmbSeqPool, EmbSeqPoolTuples);
DECLARE_MKL_KERNEL(Softmax, SoftmaxTuples); DECLARE_MKL_KERNEL(Softmax, SoftmaxTuples);
#undef DECLARE_MKL_KERNEL #undef DECLARE_MKL_KERNEL
......
...@@ -32,3 +32,4 @@ USE_JITKERNEL_REFER(kVSquare) ...@@ -32,3 +32,4 @@ USE_JITKERNEL_REFER(kVSquare)
USE_JITKERNEL_REFER(kHSum) USE_JITKERNEL_REFER(kHSum)
USE_JITKERNEL_REFER(kHMax) USE_JITKERNEL_REFER(kHMax)
USE_JITKERNEL_REFER(kSoftmax) USE_JITKERNEL_REFER(kSoftmax)
USE_JITKERNEL_REFER(kEmbSeqPool)
...@@ -57,4 +57,6 @@ REGISTER_REFER_KERNEL(kHSum, HSum); ...@@ -57,4 +57,6 @@ REGISTER_REFER_KERNEL(kHSum, HSum);
REGISTER_REFER_KERNEL(kSoftmax, Softmax); REGISTER_REFER_KERNEL(kSoftmax, Softmax);
REGISTER_REFER_KERNEL(kEmbSeqPool, EmbSeqPool);
#undef REGISTER_REFER_KERNEL #undef REGISTER_REFER_KERNEL
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <cmath> #include <cmath>
#include <limits> #include <limits>
#include <string>
#include "paddle/fluid/operators/jit/helper.h" #include "paddle/fluid/operators/jit/helper.h"
#include "paddle/fluid/operators/jit/kernel_base.h" #include "paddle/fluid/operators/jit/kernel_base.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
...@@ -414,6 +415,37 @@ void Softmax(const T* x, T* y, int n, int bs = 1) { ...@@ -414,6 +415,37 @@ void Softmax(const T* x, T* y, int n, int bs = 1) {
} }
} }
// embedding seq pool
// table is a matrix with (tbl_h, tbl_w)
// idx is a matrix with (idx_h, idx_w)
// output is a vector with length tbl_w * idx_w
template <typename T>
void EmbSeqPool(const T* table, const int64_t* idx, T* out,
const emb_seq_pool_attr_t* attr) {
PADDLE_ENFORCE_EQ(attr->table_width * attr->index_width, attr->out_width);
auto check_idx_value_valid = [&](int64_t i) {
PADDLE_ENFORCE_LT(idx[i], attr->table_height, "idx value: %d, i: %d",
idx[i], i);
PADDLE_ENFORCE_GE(idx[i], 0, "idx value: %d, i: %d", idx[i], i);
};
for (int64_t w = 0; w != attr->index_width; ++w) {
check_idx_value_valid(w);
std::memcpy(out + w * attr->table_width, table + idx[w] * attr->table_width,
attr->table_width * sizeof(T));
}
for (int64_t h = 1; h < attr->index_height; ++h) {
for (int64_t w = 0; w < attr->index_width; ++w) {
int64_t i = h * attr->index_width + w;
check_idx_value_valid(i);
VAdd(table + idx[i] * attr->table_width, out + w * attr->table_width,
out + w * attr->table_width, attr->table_width);
}
}
}
#define DECLARE_REFER_KERNEL(name, tuples) \ #define DECLARE_REFER_KERNEL(name, tuples) \
template <typename T> \ template <typename T> \
class name##Kernel : public ReferKernel<tuples<T>> { \ class name##Kernel : public ReferKernel<tuples<T>> { \
...@@ -462,6 +494,8 @@ DECLARE_REFER_KERNEL(HSum, XRNTuples); ...@@ -462,6 +494,8 @@ DECLARE_REFER_KERNEL(HSum, XRNTuples);
DECLARE_REFER_KERNEL(Softmax, SoftmaxTuples); DECLARE_REFER_KERNEL(Softmax, SoftmaxTuples);
DECLARE_REFER_KERNEL(EmbSeqPool, EmbSeqPoolTuples);
#undef DECLARE_REFER_KERNEL #undef DECLARE_REFER_KERNEL
} // namespace refer } // namespace refer
......
...@@ -270,6 +270,32 @@ struct TestFuncWithRefer<jit::SeqPoolTuples<T>, std::vector<T>, std::vector<T>, ...@@ -270,6 +270,32 @@ struct TestFuncWithRefer<jit::SeqPoolTuples<T>, std::vector<T>, std::vector<T>,
} }
}; };
template <typename T>
struct TestFuncWithRefer<jit::EmbSeqPoolTuples<T>, std::vector<T>,
std::vector<int64_t>, std::vector<T>,
typename jit::EmbSeqPoolTuples<T>::attr_type> {
void operator()(const typename jit::EmbSeqPoolTuples<T>::func_type tgt,
const std::vector<T>& table, const std::vector<int64_t>& idx,
const std::vector<T>& oref,
const typename jit::EmbSeqPoolTuples<T>::attr_type& attr) {
EXPECT_TRUE(tgt != nullptr);
EXPECT_EQ(table.size(),
static_cast<size_t>(attr.table_height * attr.table_width));
EXPECT_EQ(idx.size(),
static_cast<size_t>(attr.index_height * attr.index_width));
EXPECT_EQ(oref.size(),
static_cast<size_t>(attr.table_width * attr.index_width));
const T* table_data = table.data();
const int64_t* idx_data = idx.data();
const T* oref_data = oref.data();
int o_w = oref.size();
std::vector<T> out(o_w);
T* o_data = out.data();
tgt(table_data, idx_data, o_data, &attr);
ExpectEQ<T>(o_data, oref_data, o_w);
}
};
template <typename T> template <typename T>
struct TestFuncWithRefer<jit::MatMulTuples<T>, std::vector<T>, std::vector<T>, struct TestFuncWithRefer<jit::MatMulTuples<T>, std::vector<T>, std::vector<T>,
std::vector<T>, std::vector<T>,
...@@ -644,6 +670,40 @@ void TestSoftmaxKernel() { ...@@ -644,6 +670,40 @@ void TestSoftmaxKernel() {
} }
} }
template <jit::KernelType KT, typename T, typename PlaceType>
void TestEmbSeqPoolKernel() {
VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
int64_t tbl_h = 1e4;
std::vector<jit::SeqPoolType> pool_types = {
jit::SeqPoolType::kSum}; // only support sum yet
for (int tbl_w : TestSizes()) {
std::vector<T> table(tbl_h * tbl_w);
RandomVec<T>(tbl_h * tbl_w, table.data(), -2.f, 2.f);
const T* table_data = table.data();
for (auto type : pool_types) {
for (int idx_w : {1, 2, 10, 16}) {
for (int idx_h : {1, 2, 9, 13, 16}) {
auto ref = jit::GetRefer<KT, jit::EmbSeqPoolTuples<T>>();
EXPECT_TRUE(ref != nullptr);
std::vector<int64_t> idx(idx_h * idx_w);
RandomVec<int64_t>(idx_h * idx_w, idx.data(), 0, tbl_h - 1);
int64_t out_w = tbl_w * idx_w;
std::vector<T> oref(out_w);
const int64_t* idx_data = idx.data();
T* o_data = oref.data();
jit::emb_seq_pool_attr_t attr(tbl_h, tbl_w, idx_h, idx_w, out_w,
type);
ref(table_data, idx_data, o_data, &attr);
TestAllImpls<KT, jit::EmbSeqPoolTuples<T>, PlaceType, std::vector<T>,
std::vector<int64_t>, std::vector<T>>(attr, table, idx,
oref, attr);
}
}
}
}
}
template <jit::KernelType KT, typename T, typename PlaceType> template <jit::KernelType KT, typename T, typename PlaceType>
void TestNCHW16CMulNCKernel() { void TestNCHW16CMulNCKernel() {
VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
...@@ -878,6 +938,11 @@ TEST(JITKernel, kSoftmax) { ...@@ -878,6 +938,11 @@ TEST(JITKernel, kSoftmax) {
TestSoftmaxKernel<jit::kSoftmax, double, CPUPlace>(); TestSoftmaxKernel<jit::kSoftmax, double, CPUPlace>();
} }
TEST(JITKernel, kEmbSeqPool) {
TestEmbSeqPoolKernel<jit::kEmbSeqPool, float, CPUPlace>();
TestEmbSeqPoolKernel<jit::kEmbSeqPool, double, CPUPlace>();
}
TEST(JITKernel, kNCHW16CMulNC) { TEST(JITKernel, kNCHW16CMulNC) {
TestNCHW16CMulNCKernel<jit::kNCHW16CMulNC, float, CPUPlace>(); TestNCHW16CMulNCKernel<jit::kNCHW16CMulNC, float, CPUPlace>();
TestNCHW16CMulNCKernel<jit::kNCHW16CMulNC, double, CPUPlace>(); TestNCHW16CMulNCKernel<jit::kNCHW16CMulNC, double, CPUPlace>();
......
...@@ -36,6 +36,8 @@ std::map<std::string, ...@@ -36,6 +36,8 @@ std::map<std::string,
{"conv2d_grad", NG_OPS::BuildConv2dGradNode}, {"conv2d_grad", NG_OPS::BuildConv2dGradNode},
{"batch_norm", NG_OPS::BuildBatchNormNode}, {"batch_norm", NG_OPS::BuildBatchNormNode},
{"batch_norm_grad", NG_OPS::BuildBatchNormGradNode}, {"batch_norm_grad", NG_OPS::BuildBatchNormGradNode},
{"cross_entropy", NG_OPS::BuildCrossEntropyNode},
{"cross_entropy_grad", NG_OPS::BuildCrossEntropyGradNode},
{"elementwise_add", NG_OPS::BuildElementwiseAddNode}, {"elementwise_add", NG_OPS::BuildElementwiseAddNode},
{"elementwise_add_grad", NG_OPS::BuildElementwiseAddGradNode}, {"elementwise_add_grad", NG_OPS::BuildElementwiseAddGradNode},
{"fill_constant", NG_OPS::BuildFillConstantNode}, {"fill_constant", NG_OPS::BuildFillConstantNode},
......
...@@ -26,6 +26,7 @@ limitations under the License. */ ...@@ -26,6 +26,7 @@ limitations under the License. */
#include "ops/batch_norm_op.h" #include "ops/batch_norm_op.h"
#include "ops/binary_unary_op.h" #include "ops/binary_unary_op.h"
#include "ops/conv2d_op.h" #include "ops/conv2d_op.h"
#include "ops/cross_entropy_op.h"
#include "ops/elementwise_add_op.h" #include "ops/elementwise_add_op.h"
#include "ops/fill_constant_op.h" #include "ops/fill_constant_op.h"
#include "ops/mean_op.h" #include "ops/mean_op.h"
......
...@@ -44,6 +44,10 @@ void BuildBatchNormNode( ...@@ -44,6 +44,10 @@ void BuildBatchNormNode(
const float epsilon = op_attrs.Get<float>("epsilon"); const float epsilon = op_attrs.Get<float>("epsilon");
const float momentum = op_attrs.Get<float>("momentum"); const float momentum = op_attrs.Get<float>("momentum");
PADDLE_ENFORCE(
data_layout == "NHWC" || data_layout == "NCHW" || data_layout == "NC",
"The BatchNorm operator only supports NHWC/NCHW/NC data format");
if (data_layout == "NHWC") { if (data_layout == "NHWC") {
x = paddle::platform::Nhwc2Nchw(x); x = paddle::platform::Nhwc2Nchw(x);
} }
...@@ -110,6 +114,9 @@ void BuildBatchNormGradNode( ...@@ -110,6 +114,9 @@ void BuildBatchNormGradNode(
"BN grap input size needs to be 2 or 4"); "BN grap input size needs to be 2 or 4");
PADDLE_ENFORCE_EQ(x_shape.size(), dy_shape.size(), PADDLE_ENFORCE_EQ(x_shape.size(), dy_shape.size(),
"BN grap input and delta size needs to be equal"); "BN grap input and delta size needs to be equal");
PADDLE_ENFORCE(
data_layout == "NHWC" || data_layout == "NCHW" || data_layout == "NC",
"The BatchNorm operator only supports NHWC/NCHW/NC data format");
if (x_shape.size() == 2) { if (x_shape.size() == 2) {
x = std::make_shared<ngraph::op::Reshape>( x = std::make_shared<ngraph::op::Reshape>(
......
/*Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <functional>
#include <string>
#include "ngraph/ngraph.hpp"
#include "paddle/fluid/platform/ngraph_helper.h"
namespace paddle {
namespace operators {
namespace ngraphs {
void BuildCrossEntropyNode(
const std::shared_ptr<paddle::framework::OperatorBase>& op,
std::shared_ptr<
std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>>
ngb_node_map) {
auto x = paddle::platform::GetInputNode(op, "X", ngb_node_map);
auto label = paddle::platform::GetInputNode(op, "Label", ngb_node_map);
auto label_shape = label->get_shape();
auto x_shape = x->get_shape();
auto label_rank = label_shape.size();
auto x_rank = x_shape.size();
std::shared_ptr<ngraph::Node> x_2d = x, label_2d = label;
auto label_2d_shape = label_shape, x_2d_shape = x_shape;
if (label_rank > 2) {
label_2d_shape = paddle::platform::FlattenTo2d(label_shape, label_rank - 1);
label_2d = paddle::platform::NgReshaper(label, label_2d_shape);
}
if (x_rank > 2) {
x_2d_shape = paddle::platform::FlattenTo2d(x_shape, x_rank - 1);
x_2d = paddle::platform::NgReshaper(x, x_2d_shape);
}
auto batch_size = x_2d_shape.at(0);
auto op_attrs = paddle::framework::AttrReader(op->Attrs());
const bool is_soft_label = op_attrs.Get<bool>("soft_label");
std::shared_ptr<ngraph::Node> node_1_hot = label_2d;
if (!is_soft_label) {
auto label_1d = paddle::platform::NgReshaper(
label_2d, ngraph::Shape{label_2d_shape.at(0)});
node_1_hot = std::make_shared<ngraph::op::OneHot>(label_1d, x_2d_shape, 1);
}
if (x->get_element_type() != node_1_hot->get_element_type()) {
node_1_hot = std::make_shared<ngraph::op::Convert>(node_1_hot,
x->get_element_type());
}
auto node_log = std::make_shared<ngraph::op::Log>(x_2d);
auto high_clip = ngraph::op::Constant::create(node_log->get_element_type(),
node_log->get_shape(), {1e20});
auto low_clip = ngraph::op::Constant::create(node_log->get_element_type(),
node_log->get_shape(), {-1e20});
auto node_min = std::make_shared<ngraph::op::Minimum>(node_log, high_clip);
auto node_max = std::make_shared<ngraph::op::Maximum>(node_min, low_clip);
auto node_mul = node_1_hot * node_log;
auto node_sum =
std::make_shared<ngraph::op::Sum>(node_mul, ngraph::AxisSet{1});
auto node_neg = std::make_shared<ngraph::op::Negative>(node_sum);
auto xe =
paddle::platform::NgReshaper(node_neg, ngraph::Shape{batch_size, 1});
if (!is_soft_label) {
auto ignore_index = op_attrs.Get<int>("ignore_index");
auto ignore_node = ngraph::op::Constant::create(
label->get_element_type(), label_2d_shape, {ignore_index});
auto not_equal_node =
std::make_shared<ngraph::op::NotEqual>(label_2d, ignore_node);
auto mask = std::make_shared<ngraph::op::Convert>(not_equal_node,
xe->get_element_type());
xe = xe * mask;
}
paddle::platform::SetOutputNode(op, "Y", xe, ngb_node_map);
}
void BuildCrossEntropyGradNode(
const std::shared_ptr<paddle::framework::OperatorBase>& op,
std::shared_ptr<
std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>>
ngb_node_map) {
auto op_attrs = paddle::framework::AttrReader(op->Attrs());
const bool is_soft_label = op_attrs.Get<bool>("soft_label");
auto x = paddle::platform::GetInputNode(op, "X", ngb_node_map);
auto label = paddle::platform::GetInputNode(op, "Label", ngb_node_map);
auto dy = paddle::platform::GetInputNode(op, "Y@GRAD", ngb_node_map);
auto x_shape = x->get_shape();
auto rank = x_shape.size();
std::shared_ptr<ngraph::Node> mask;
if (!is_soft_label) {
auto label_shape = label->get_shape();
label_shape.pop_back();
label = paddle::platform::NgReshaper(label, label_shape);
auto ignore_index = op_attrs.Get<int>("ignore_index");
auto ignore_node = ngraph::op::Constant::create(
label->get_element_type(), label_shape, {ignore_index});
auto not_equal_node =
std::make_shared<ngraph::op::NotEqual>(label, ignore_node);
mask = std::make_shared<ngraph::op::Convert>(not_equal_node,
x->get_element_type());
mask = std::make_shared<ngraph::op::Broadcast>(mask, x_shape,
ngraph::AxisSet{rank - 1});
label = std::make_shared<ngraph::op::OneHot>(label, x_shape, rank - 1);
}
auto dy_shape = dy->get_shape();
dy_shape.pop_back();
auto dy_reshape = paddle::platform::NgReshaper(dy, dy_shape);
auto dy_bcast = std::make_shared<ngraph::op::Broadcast>(
dy_reshape, x_shape, ngraph::AxisSet{rank - 1});
if (x->get_element_type() != label->get_element_type()) {
label = std::make_shared<ngraph::op::Convert>(label, x->get_element_type());
}
auto xe_grad = -label * dy_bcast / x;
if (!is_soft_label) {
xe_grad = xe_grad * mask;
}
paddle::platform::SetOutputNode(op, "X@GRAD", xe_grad, ngb_node_map);
}
} // namespace ngraphs
} // namespace operators
} // namespace paddle
...@@ -46,8 +46,6 @@ void BuildFillConstantNode( ...@@ -46,8 +46,6 @@ void BuildFillConstantNode(
ng_dtype = ngraph::element::i64; ng_dtype = ngraph::element::i64;
} else if (data_type == paddle::framework::proto::VarType::INT32) { } else if (data_type == paddle::framework::proto::VarType::INT32) {
ng_dtype = ngraph::element::i32; ng_dtype = ngraph::element::i32;
} else if (data_type == paddle::framework::proto::VarType::BOOL) {
ng_dtype = ngraph::element::boolean;
} else { } else {
PADDLE_THROW("unsupported data type: %s", data_type); PADDLE_THROW("unsupported data type: %s", data_type);
} }
......
...@@ -373,7 +373,13 @@ PYBIND11_MODULE(core, m) { ...@@ -373,7 +373,13 @@ PYBIND11_MODULE(core, m) {
PADDLE_ENFORCE(CheckLoD(new_lod, vectorize(self.dims()).front()), PADDLE_ENFORCE(CheckLoD(new_lod, vectorize(self.dims()).front()),
"the provided lod info is invalid"); "the provided lod info is invalid");
self.set_lod(new_lod); self.set_lod(new_lod);
}) },
py::arg("lod"), R"DOC(
Set LoD of the LoDTensor.
Args:
lod (List[List[int]]): the lod to be set.
)DOC")
.def("set_recursive_sequence_lengths", .def("set_recursive_sequence_lengths",
[](LoDTensor &self, const std::vector<std::vector<size_t>> [](LoDTensor &self, const std::vector<std::vector<size_t>>
&recursive_sequence_lengths) { &recursive_sequence_lengths) {
...@@ -389,7 +395,17 @@ PYBIND11_MODULE(core, m) { ...@@ -389,7 +395,17 @@ PYBIND11_MODULE(core, m) {
CheckLoD(new_offset_lod, vectorize(self.dims()).front()), CheckLoD(new_offset_lod, vectorize(self.dims()).front()),
"the provided recursive_sequence_lengths info is invalid"); "the provided recursive_sequence_lengths info is invalid");
self.set_lod(new_offset_lod); self.set_lod(new_offset_lod);
}) },
py::arg("recursive_sequence_lengths"), R"DOC(
Set LoD of the LoDTensor according to recursive sequence length.
For example, if recursive_sequence_lengths=[[2, 3]], meaning that
there are two sequences with length 2 and 3 respectively, the
corresponding lod would be [[0, 2, 2+3]], i.e, [[0, 2, 5]].
Args:
recursive_sequence_lengths (List[List[int]]): sequence lengths.
)DOC")
.def("lod", .def("lod",
[](LoDTensor &self) -> std::vector<std::vector<size_t>> { [](LoDTensor &self) -> std::vector<std::vector<size_t>> {
// output the offset-based lod info // output the offset-based lod info
...@@ -398,7 +414,13 @@ PYBIND11_MODULE(core, m) { ...@@ -398,7 +414,13 @@ PYBIND11_MODULE(core, m) {
new_lod.reserve(lod.size()); new_lod.reserve(lod.size());
std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod));
return new_lod; return new_lod;
}) },
R"DOC(
Return the LoD of the LoDTensor.
Returns:
out (List[List[int]]): the lod of the LoDTensor.
)DOC")
// Set above comments of set_lod. // Set above comments of set_lod.
.def("recursive_sequence_lengths", .def("recursive_sequence_lengths",
[](LoDTensor &self) -> std::vector<std::vector<size_t>> { [](LoDTensor &self) -> std::vector<std::vector<size_t>> {
...@@ -408,12 +430,25 @@ PYBIND11_MODULE(core, m) { ...@@ -408,12 +430,25 @@ PYBIND11_MODULE(core, m) {
new_lod.reserve(lod.size()); new_lod.reserve(lod.size());
std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod));
return new_lod; return new_lod;
}) },
.def("has_valid_recursive_sequence_lengths", [](LoDTensor &self) -> bool { R"DOC(
// Check that the lod info is valid and match the outermost Return the sequence length of the LoDTensor corresponding to LoD.
// dimension of the LoDTensor data
return CheckLoD(self.lod(), vectorize(self.dims()).front()); Returns:
}); out (List[List[int]): the sequence lengths.
)DOC")
.def("has_valid_recursive_sequence_lengths",
[](LoDTensor &self) -> bool {
// Check that the lod info is valid and match the outermost
// dimension of the LoDTensor data
return CheckLoD(self.lod(), vectorize(self.dims()).front());
},
R"DOC(
Check whether the lod of the LoDTensor is valid.
Returns:
out (bool): whether the lod is valid.
)DOC");
py::class_<SelectedRows>(m, "SelectedRows") py::class_<SelectedRows>(m, "SelectedRows")
.def("__init__", .def("__init__",
...@@ -549,11 +584,45 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -549,11 +584,45 @@ All parameter, weight, gradient are variables in Paddle.
[](Scope &self, const std::string &name) -> Variable * { [](Scope &self, const std::string &name) -> Variable * {
return self.Var(name); return self.Var(name);
}, },
py::arg("name"),
R"DOC(
Find or create variable named :code:`name` in the current scope.
If the variable named :code:`name` does not exist in the
current scope, the variable would be created. Otherwise,
return the existing variable.
Args:
name (str): the variable name.
Returns:
out (core.Variable): the found or created variable.
)DOC",
py::return_value_policy::reference)
.def("find_var", &Scope::FindVar, py::arg("name"),
R"DOC(
Find variable named :code:`name` in the current scope or
its parent scope. Return None if not found.
Args:
name (str): the variable name.
Returns:
out (core.Variable|None): the found variable or None.
)DOC",
py::return_value_policy::reference) py::return_value_policy::reference)
.def("find_var", &Scope::FindVar, py::return_value_policy::reference)
.def("new_scope", [](Scope &self) -> Scope * { return &self.NewScope(); }, .def("new_scope", [](Scope &self) -> Scope * { return &self.NewScope(); },
R"DOC(
Create a new sub-scope of the current scope.
Returns:
out (core._Scope): the created sub-scope.
)DOC",
py::return_value_policy::reference) py::return_value_policy::reference)
.def("drop_kids", &Scope::DropKids); .def("drop_kids", &Scope::DropKids,
R"DOC(
Delete all sub-scopes of the current scope.
)DOC");
m.def("Scope", m.def("Scope",
[]() -> Scope * { []() -> Scope * {
...@@ -561,6 +630,12 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -561,6 +630,12 @@ All parameter, weight, gradient are variables in Paddle.
ScopePool::Instance().Insert(std::unique_ptr<Scope>(s)); ScopePool::Instance().Insert(std::unique_ptr<Scope>(s));
return s; return s;
}, },
R"DOC(
Create a new scope.
Returns:
out (core._Scope): the created scope.
)DOC",
py::return_value_policy::reference); py::return_value_policy::reference);
//! @note: Be careful! PyBind will return std::string as an unicode, not //! @note: Be careful! PyBind will return std::string as an unicode, not
...@@ -789,11 +864,13 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -789,11 +864,13 @@ All parameter, weight, gradient are variables in Paddle.
self[i].ShareDataWith(t); self[i].ShareDataWith(t);
self[i].set_lod(t.lod()); self[i].set_lod(t.lod());
}) })
.def("append", [](LoDTensorArray &self, const LoDTensor &t) { .def("append",
self.emplace_back(); [](LoDTensorArray &self, const LoDTensor &t) {
self.back().ShareDataWith(t); self.emplace_back();
self.back().set_lod(t.lod()); self.back().ShareDataWith(t);
}); self.back().set_lod(t.lod());
},
py::arg("tensor"), "Append a LoDensor to LoDTensorArray.");
m.def("IsInplace", m.def("IsInplace",
[](std::string op) -> bool { return operators::IsInplace(op); }); [](std::string op) -> bool { return operators::IsInplace(op); });
......
...@@ -88,6 +88,7 @@ function cmake_gen() { ...@@ -88,6 +88,7 @@ function cmake_gen() {
-DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/3.5/include/python3.5m/ -DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/3.5/include/python3.5m/
-DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.5/lib/libpython3.5m.dylib" -DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.5/lib/libpython3.5m.dylib"
WITH_FLUID_ONLY=${WITH_FLUID_ONLY:-ON} WITH_FLUID_ONLY=${WITH_FLUID_ONLY:-ON}
pip3.5 uninstall -y protobuf
pip3.5 install --user -r ${PADDLE_ROOT}/python/requirements.txt pip3.5 install --user -r ${PADDLE_ROOT}/python/requirements.txt
else else
exit 1 exit 1
...@@ -101,6 +102,7 @@ function cmake_gen() { ...@@ -101,6 +102,7 @@ function cmake_gen() {
-DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/3.6/include/python3.6m/ -DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/3.6/include/python3.6m/
-DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.6/lib/libpython3.6m.dylib" -DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.6/lib/libpython3.6m.dylib"
WITH_FLUID_ONLY=${WITH_FLUID_ONLY:-ON} WITH_FLUID_ONLY=${WITH_FLUID_ONLY:-ON}
pip3.6 uninstall -y protobuf
pip3.6 install --user -r ${PADDLE_ROOT}/python/requirements.txt pip3.6 install --user -r ${PADDLE_ROOT}/python/requirements.txt
else else
exit 1 exit 1
...@@ -114,6 +116,7 @@ function cmake_gen() { ...@@ -114,6 +116,7 @@ function cmake_gen() {
-DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/3.7/include/python3.7m/ -DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/3.7/include/python3.7m/
-DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.7/lib/libpython3.7m.dylib" -DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.7/lib/libpython3.7m.dylib"
WITH_FLUID_ONLY=${WITH_FLUID_ONLY:-ON} WITH_FLUID_ONLY=${WITH_FLUID_ONLY:-ON}
pip3.7 uninstall -y protobuf
pip3.7 install --user -r ${PADDLE_ROOT}/python/requirements.txt pip3.7 install --user -r ${PADDLE_ROOT}/python/requirements.txt
else else
exit 1 exit 1
...@@ -128,31 +131,44 @@ function cmake_gen() { ...@@ -128,31 +131,44 @@ function cmake_gen() {
PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27m/bin/python PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27m/bin/python
-DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27m/include/python2.7 -DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27m/include/python2.7
-DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs2/lib/libpython2.7.so" -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs2/lib/libpython2.7.so"
pip uninstall -y protobuf
pip install -r ${PADDLE_ROOT}/python/requirements.txt
elif [ "$1" == "cp27-cp27mu" ]; then elif [ "$1" == "cp27-cp27mu" ]; then
export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs4/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.11-ucs2/lib:} export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs4/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.11-ucs2/lib:}
export PATH=/opt/python/cp27-cp27mu/bin/:${PATH} export PATH=/opt/python/cp27-cp27mu/bin/:${PATH}
PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27mu/bin/python PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27mu/bin/python
-DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27mu/include/python2.7 -DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27mu/include/python2.7
-DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs4/lib/libpython2.7.so" -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs4/lib/libpython2.7.so"
pip uninstall -y protobuf
pip install -r ${PADDLE_ROOT}/python/requirements.txt
elif [ "$1" == "cp35-cp35m" ]; then elif [ "$1" == "cp35-cp35m" ]; then
export LD_LIBRARY_PATH=/opt/_internal/cpython-3.5.1/lib/:${LD_LIBRARY_PATH} export LD_LIBRARY_PATH=/opt/_internal/cpython-3.5.1/lib/:${LD_LIBRARY_PATH}
export PATH=/opt/_internal/cpython-3.5.1/bin/:${PATH} export PATH=/opt/_internal/cpython-3.5.1/bin/:${PATH}
export PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/_internal/cpython-3.5.1/bin/python3 export PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/_internal/cpython-3.5.1/bin/python3
-DPYTHON_INCLUDE_DIR:PATH=/opt/_internal/cpython-3.5.1/include/python3.5m -DPYTHON_INCLUDE_DIR:PATH=/opt/_internal/cpython-3.5.1/include/python3.5m
-DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-3.5.1/lib/libpython3.so" -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-3.5.1/lib/libpython3.so"
pip3.5 uninstall -y protobuf
pip3.5 install -r ${PADDLE_ROOT}/python/requirements.txt
elif [ "$1" == "cp36-cp36m" ]; then elif [ "$1" == "cp36-cp36m" ]; then
export LD_LIBRARY_PATH=/opt/_internal/cpython-3.6.0/lib/:${LD_LIBRARY_PATH} export LD_LIBRARY_PATH=/opt/_internal/cpython-3.6.0/lib/:${LD_LIBRARY_PATH}
export PATH=/opt/_internal/cpython-3.6.0/bin/:${PATH} export PATH=/opt/_internal/cpython-3.6.0/bin/:${PATH}
export PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/_internal/cpython-3.6.0/bin/python3 export PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/_internal/cpython-3.6.0/bin/python3
-DPYTHON_INCLUDE_DIR:PATH=/opt/_internal/cpython-3.6.0/include/python3.6m -DPYTHON_INCLUDE_DIR:PATH=/opt/_internal/cpython-3.6.0/include/python3.6m
-DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-3.6.0/lib/libpython3.so" -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-3.6.0/lib/libpython3.so"
pip3.6 uninstall -y protobuf
pip3.6 install -r ${PADDLE_ROOT}/python/requirements.txt
elif [ "$1" == "cp37-cp37m" ]; then elif [ "$1" == "cp37-cp37m" ]; then
export LD_LIBRARY_PATH=/opt/_internal/cpython-3.7.0/lib/:${LD_LIBRARY_PATH} export LD_LIBRARY_PATH=/opt/_internal/cpython-3.7.0/lib/:${LD_LIBRARY_PATH}
export PATH=/opt/_internal/cpython-3.7.0/bin/:${PATH} export PATH=/opt/_internal/cpython-3.7.0/bin/:${PATH}
export PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/_internal/cpython-3.7.0/bin/python3.7 export PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/_internal/cpython-3.7.0/bin/python3.7
-DPYTHON_INCLUDE_DIR:PATH=/opt/_internal/cpython-3.7.0/include/python3.7m -DPYTHON_INCLUDE_DIR:PATH=/opt/_internal/cpython-3.7.0/include/python3.7m
-DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-3.7.0/lib/libpython3.so" -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-3.7.0/lib/libpython3.so"
pip3.7 uninstall -y protobuf
pip3.7 install -r ${PADDLE_ROOT}/python/requirements.txt
fi fi
else
pip uninstall -y protobuf
pip install -r ${PADDLE_ROOT}/python/requirements.txt
fi fi
fi fi
......
...@@ -177,7 +177,10 @@ class CompiledProgram(object): ...@@ -177,7 +177,10 @@ class CompiledProgram(object):
# FIXME(dzhwinter): enable_inplace should be after memory_optimize # FIXME(dzhwinter): enable_inplace should be after memory_optimize
# if turn on python memory optimize, turn off the inplace_pass. # if turn on python memory optimize, turn off the inplace_pass.
self._build_strategy.enable_inplace = False if self._program._is_mem_optimized else True if self._build_strategy.memory_optimize is None:
self._build_strategy.memory_optimize = False if main._is_mem_optimized else True
if self._build_strategy.enable_inplace is None:
self._build_strategy.enable_inplace = False if main._is_mem_optimized else True
if self._build_strategy.num_trainers > 1 and trainers_endpoints: if self._build_strategy.num_trainers > 1 and trainers_endpoints:
assert self._build_strategy.num_trainers == len( assert self._build_strategy.num_trainers == len(
......
...@@ -557,7 +557,8 @@ class OpProtoHolder(object): ...@@ -557,7 +557,8 @@ class OpProtoHolder(object):
return { return {
core.op_proto_and_checker_maker.kOpRoleAttrName(), core.op_proto_and_checker_maker.kOpRoleAttrName(),
core.op_proto_and_checker_maker.kOpRoleVarAttrName(), core.op_proto_and_checker_maker.kOpRoleVarAttrName(),
core.op_proto_and_checker_maker.kOpNameScopeAttrName() core.op_proto_and_checker_maker.kOpNameScopeAttrName(),
core.op_proto_and_checker_maker.kOpCreationCallstackAttrName()
} }
......
...@@ -506,9 +506,9 @@ class While(object): ...@@ -506,9 +506,9 @@ class While(object):
while loop control flow. while loop control flow.
Args: Args:
cond (Variable): condition used to compare. cond(Variable): condition used to compare.
is_test(bool): A flag indicating whether execution is in test phase. is_test(bool): A flag indicating whether execution is in test phase.
name (str): The name of this layer. name(str): The name of this layer.
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -589,7 +589,8 @@ class While(object): ...@@ -589,7 +589,8 @@ class While(object):
def lod_rank_table(x, level=0): def lod_rank_table(x, level=0):
"""LoD Rank Table Operator. Given an input variable **x** and a level number """
LoD Rank Table Operator. Given an input variable **x** and a level number
of LoD, this layer creates a LodRankTable object. A LoDRankTable object of LoD, this layer creates a LodRankTable object. A LoDRankTable object
contains a list of bi-element tuples. Each tuple consists of an index and contains a list of bi-element tuples. Each tuple consists of an index and
a length, both of which are int type. Refering to specified level of LoD, a length, both of which are int type. Refering to specified level of LoD,
...@@ -883,10 +884,8 @@ def less_than(x, y, force_cpu=None, cond=None, **ignored): ...@@ -883,10 +884,8 @@ def less_than(x, y, force_cpu=None, cond=None, **ignored):
return cond return cond
def equal(x, y, cond=None, **ignored): def equal(x, y, cond=None):
""" """
**equal**
This layer returns the truth value of :math:`x == y` elementwise. This layer returns the truth value of :math:`x == y` elementwise.
Args: Args:
...@@ -1458,7 +1457,6 @@ class DynamicRNN(object): ...@@ -1458,7 +1457,6 @@ class DynamicRNN(object):
Returns: Returns:
The current timestep in the input sequence. The current timestep in the input sequence.
""" """
self._assert_in_rnn_block_("step_input") self._assert_in_rnn_block_("step_input")
if not isinstance(x, Variable): if not isinstance(x, Variable):
...@@ -1535,8 +1533,7 @@ class DynamicRNN(object): ...@@ -1535,8 +1533,7 @@ class DynamicRNN(object):
@signature_safe_contextmanager @signature_safe_contextmanager
def block(self): def block(self):
""" """
The block for user to define operators in RNN. See the class docstring The block for user to define operators in RNN.
for more details.
""" """
if self.status != DynamicRNN.BEFORE_RNN: if self.status != DynamicRNN.BEFORE_RNN:
raise ValueError("rnn.block() can only be invoke once") raise ValueError("rnn.block() can only be invoke once")
...@@ -1640,8 +1637,7 @@ class DynamicRNN(object): ...@@ -1640,8 +1637,7 @@ class DynamicRNN(object):
dtype(str|numpy.dtype): The data type of the initialized memory. dtype(str|numpy.dtype): The data type of the initialized memory.
Returns: Returns:
the memory variable. The memory variable.
""" """
self._assert_in_rnn_block_('memory') self._assert_in_rnn_block_('memory')
self._init_zero_idx_() self._init_zero_idx_()
...@@ -1740,7 +1736,7 @@ class DynamicRNN(object): ...@@ -1740,7 +1736,7 @@ class DynamicRNN(object):
def output(self, *outputs): def output(self, *outputs):
""" """
mark the RNN output variables. Mark the RNN output variables.
Args: Args:
outputs: The output variables. outputs: The output variables.
......
...@@ -56,7 +56,10 @@ def data(name, ...@@ -56,7 +56,10 @@ def data(name,
Args: Args:
name(str): The name/alias of the function name(str): The name/alias of the function
shape(list): Tuple declaring the shape. shape(list): Tuple declaring the shape. If :code:`append_batch_size` is
True and there is no -1 inside :code:`shape`, it should be
considered as the shape of the each sample. Otherwise, it
should be considered as the shape of the batched data.
append_batch_size(bool): append_batch_size(bool):
1. If true, it prepends -1 to the shape. 1. If true, it prepends -1 to the shape.
For example if shape=[1], the resulting shape is [-1, 1]. For example if shape=[1], the resulting shape is [-1, 1].
......
...@@ -24,7 +24,7 @@ from ..framework import OpProtoHolder, Variable, core, convert_np_dtype_to_dtype ...@@ -24,7 +24,7 @@ from ..framework import OpProtoHolder, Variable, core, convert_np_dtype_to_dtype
from ..layer_helper import LayerHelper from ..layer_helper import LayerHelper
__all__ = [ __all__ = [
'deprecated', 'generate_layer_fn', 'generate_layer_fn_noattr', 'autodoc', 'deprecated', 'generate_layer_fn', 'generate_activation_fn', 'autodoc',
'templatedoc' 'templatedoc'
] ]
...@@ -89,6 +89,9 @@ def _generate_doc_string_(op_proto, additional_args_lines=None): ...@@ -89,6 +89,9 @@ def _generate_doc_string_(op_proto, additional_args_lines=None):
buf.write('\n') buf.write('\n')
skip_attrs = OpProtoHolder.generated_op_attr_names() skip_attrs = OpProtoHolder.generated_op_attr_names()
# attr use_mkldnn and is_test also should not be visible to users.
skip_attrs.add("use_mkldnn")
skip_attrs.add("is_test")
for each_attr in op_proto.attrs: for each_attr in op_proto.attrs:
if each_attr.name in skip_attrs: if each_attr.name in skip_attrs:
...@@ -226,7 +229,7 @@ def generate_layer_fn(op_type): ...@@ -226,7 +229,7 @@ def generate_layer_fn(op_type):
return func return func
def generate_layer_fn_noattr(op_type): def generate_activation_fn(op_type):
"""Register the Python layer for an Operator without Attribute. """Register the Python layer for an Operator without Attribute.
Args: Args:
...@@ -246,6 +249,7 @@ def generate_layer_fn_noattr(op_type): ...@@ -246,6 +249,7 @@ def generate_layer_fn_noattr(op_type):
func.__name__ = op_type func.__name__ = op_type
func.__doc__ = _generate_doc_string_(op_proto) func.__doc__ = _generate_doc_string_(op_proto)
return func return func
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
from __future__ import print_function from __future__ import print_function
import os import os
from .layer_function_generator import generate_layer_fn, generate_layer_fn_noattr from .layer_function_generator import generate_layer_fn, generate_activation_fn
from .. import core from .. import core
from ..framework import convert_np_dtype_to_dtype_ from ..framework import convert_np_dtype_to_dtype_
...@@ -53,7 +53,7 @@ globals()['_elementwise_div'] = generate_layer_fn('elementwise_div') ...@@ -53,7 +53,7 @@ globals()['_elementwise_div'] = generate_layer_fn('elementwise_div')
__all__ += __activations_noattr__ __all__ += __activations_noattr__
for _OP in set(__activations_noattr__): for _OP in set(__activations_noattr__):
globals()[_OP] = generate_layer_fn_noattr(_OP) globals()[_OP] = generate_activation_fn(_OP)
__all__ += ["uniform_random"] __all__ += ["uniform_random"]
......
...@@ -1368,9 +1368,9 @@ class FtrlOptimizer(Optimizer): ...@@ -1368,9 +1368,9 @@ class FtrlOptimizer(Optimizer):
Args: Args:
learning_rate (float|Variable): global learning rate. learning_rate (float|Variable): global learning rate.
l1 (float): l1 (float): L1 regularization strength.
l2 (float): l2 (float): L2 regularization strength.
lr_power (float): lr_power (float): Learning Rate Power.
regularization: A Regularizer, such as regularization: A Regularizer, such as
fluid.regularizer.L2DecayRegularizer. fluid.regularizer.L2DecayRegularizer.
name: A optional name prefix. name: A optional name prefix.
......
...@@ -148,6 +148,8 @@ class ParallelExecutor(object): ...@@ -148,6 +148,8 @@ class ParallelExecutor(object):
else framework.default_main_program() else framework.default_main_program()
# FIXME(dzhwinter): enable_inplace should be after memory_optimize # FIXME(dzhwinter): enable_inplace should be after memory_optimize
# if turn on python memory optimize, turn off the inplace_pass. # if turn on python memory optimize, turn off the inplace_pass.
if build_strategy.memory_optimize is None:
build_strategy.memory_optimize = False if main._is_mem_optimized else True
if build_strategy.enable_inplace is None: if build_strategy.enable_inplace is None:
build_strategy.enable_inplace = False if main._is_mem_optimized else True build_strategy.enable_inplace = False if main._is_mem_optimized else True
scope = scope if scope is not None else executor.global_scope() scope = scope if scope is not None else executor.global_scope()
......
...@@ -77,6 +77,7 @@ list(REMOVE_ITEM TEST_OPS test_bilinear_interp_op) ...@@ -77,6 +77,7 @@ list(REMOVE_ITEM TEST_OPS test_bilinear_interp_op)
list(REMOVE_ITEM TEST_OPS test_nearest_interp_op) list(REMOVE_ITEM TEST_OPS test_nearest_interp_op)
list(REMOVE_ITEM TEST_OPS test_imperative_resnet) list(REMOVE_ITEM TEST_OPS test_imperative_resnet)
list(REMOVE_ITEM TEST_OPS test_imperative_optimizer) list(REMOVE_ITEM TEST_OPS test_imperative_optimizer)
list(REMOVE_ITEM TEST_OPS test_ir_memory_optimize_transformer)
foreach(TEST_OP ${TEST_OPS}) foreach(TEST_OP ${TEST_OPS})
py_test_modules(${TEST_OP} MODULES ${TEST_OP}) py_test_modules(${TEST_OP} MODULES ${TEST_OP})
endforeach(TEST_OP) endforeach(TEST_OP)
...@@ -107,6 +108,9 @@ py_test_modules(test_parallel_executor_crf MODULES test_parallel_executor_crf SE ...@@ -107,6 +108,9 @@ py_test_modules(test_parallel_executor_crf MODULES test_parallel_executor_crf SE
py_test_modules(test_parallel_executor_fetch_feed MODULES test_parallel_executor_fetch_feed SERIAL) py_test_modules(test_parallel_executor_fetch_feed MODULES test_parallel_executor_fetch_feed SERIAL)
set_tests_properties(test_parallel_executor_fetch_feed PROPERTIES TIMEOUT 450) set_tests_properties(test_parallel_executor_fetch_feed PROPERTIES TIMEOUT 450)
py_test_modules(test_parallel_executor_transformer MODULES test_parallel_executor_transformer SERIAL) py_test_modules(test_parallel_executor_transformer MODULES test_parallel_executor_transformer SERIAL)
if(NOT WIN32)
py_test_modules(test_ir_memory_optimize_transformer MODULES test_ir_memory_optimize_transformer SERIAL)
endif()
if(NOT APPLE) if(NOT APPLE)
py_test_modules(test_image_classification_resnet MODULES test_image_classification_resnet SERIAL) py_test_modules(test_image_classification_resnet MODULES test_image_classification_resnet SERIAL)
if(CMAKE_BUILD_TYPE STREQUAL "Debug") if(CMAKE_BUILD_TYPE STREQUAL "Debug")
......
...@@ -15,39 +15,7 @@ ...@@ -15,39 +15,7 @@
from __future__ import print_function from __future__ import print_function
import unittest import unittest
import numpy as np from paddle.fluid.tests.unittests.test_accuracy_op import TestAccuracyOp
from paddle.fluid.tests.unittests.op_test import OpTest
class TestNGRAPHAccuracyOp(OpTest):
def setUp(self):
self.op_type = "accuracy"
self.dtype = np.float32
self.init_dtype()
n = 128
infer = np.random.random((n, 1)).astype(self.dtype)
indices = np.random.randint(0, 2, (n, 1))
label = np.random.randint(0, 2, (n, 1))
self.inputs = {'Out': infer, 'Indices': indices, "Label": label}
num_correct = 0
for rowid in range(n):
for ele in indices[rowid]:
if ele == label[rowid]:
num_correct += 1
break
self.outputs = {
'Accuracy': np.array([num_correct / float(n)]).astype(self.dtype),
'Correct': np.array([num_correct]).astype("int64"),
'Total': np.array([n]).astype("int64")
}
self._cpu_only = True
def init_dtype(self):
pass
def test_check_output(self):
self.check_output()
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -17,21 +17,5 @@ from __future__ import print_function ...@@ -17,21 +17,5 @@ from __future__ import print_function
import unittest import unittest
from paddle.fluid.tests.unittests.test_batch_norm_op import TestBatchNormOpTraining, TestBatchNormOpInference from paddle.fluid.tests.unittests.test_batch_norm_op import TestBatchNormOpTraining, TestBatchNormOpInference
class TestNGRAPHBatchNormOpTraining(TestBatchNormOpTraining):
def init_kernel_type(self):
super(TestNGRAPHBatchNormOpTraining, self).init_kernel_type()
class TestNGRAPHBatchNormOpInference(TestBatchNormOpInference):
def init_kernel_type(self):
super(TestNGRAPHBatchNormOpInference, self).init_kernel_type()
class TestNGRAPHBatchNormOpWithReluInference(TestBatchNormOpInference):
def init_kernel_type(self):
super(TestNGRAPHBatchNormOpWithReluInference, self).init_kernel_type()
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -17,60 +17,5 @@ from __future__ import print_function ...@@ -17,60 +17,5 @@ from __future__ import print_function
import unittest import unittest
from paddle.fluid.tests.unittests.test_conv2d_op import TestConv2dOp, TestWithPad, TestWithStride, TestWithGroup, TestWith1x1, TestWithInput1x1Filter1x1 from paddle.fluid.tests.unittests.test_conv2d_op import TestConv2dOp, TestWithPad, TestWithStride, TestWithGroup, TestWith1x1, TestWithInput1x1Filter1x1
class TestNGRAPH(TestConv2dOp):
def setUp(self):
super(TestNGRAPH, self).setUp()
self._cpu_only = True
def init_kernel_type(self):
super(TestNGRAPH, self).init_kernel_type()
class TestNGRAPHWithPad(TestWithPad):
def setUp(self):
super(TestNGRAPHWithPad, self).setUp()
self._cpu_only = True
def init_kernel_type(self):
super(TestNGRAPHWithPad, self).init_kernel_type()
class TestNGRAPHWithStride(TestWithStride):
def setUp(self):
super(TestNGRAPHWithStride, self).setUp()
self._cpu_only = True
def init_kernel_type(self):
super(TestNGRAPHWithStride, self).init_kernel_type()
class TestNGRAPHWithGroup(TestWithGroup):
def setUp(self):
super(TestNGRAPHWithGroup, self).setUp()
self._cpu_only = True
def init_kernel_type(self):
super(TestNGRAPHWithGroup, self).init_kernel_type()
class TestNGRAPHWith1x1(TestWith1x1):
def setUp(self):
super(TestNGRAPHWith1x1, self).setUp()
self._cpu_only = True
def init_kernel_type(self):
super(TestNGRAPHWith1x1, self).init_kernel_type()
class TestNGRAPHWithInput1x1Filter1x1(TestWithInput1x1Filter1x1):
def setUp(self):
super(TestNGRAPHWithInput1x1Filter1x1, self).setUp()
self._cpu_only = True
def init_kernel_type(self):
super(TestNGRAPHWithInput1x1Filter1x1, self).init_kernel_type()
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
import paddle.fluid.core as core
from paddle.fluid.tests.unittests.op_test import OpTest, randomize_probability
class TestCrossEntropyOp(OpTest):
"""Test cross-entropy with discrete one-hot labels.
"""
def setUp(self):
self.op_type = "cross_entropy"
self.soft_label = False
self.ignore_index = -100
self.dtype = np.float64
self.batch_size = 30
self.class_num = 10
self._cpu_only = True
self.init_dtype_type()
self.init_attr_type()
self.init_bs_class_num()
self.init_x()
self.init_label()
self.get_cross_entropy()
self.inputs = {"X": self.x, "Label": self.label}
self.outputs = {"Y": self.cross_entropy}
self.attrs = {
"soft_label": self.soft_label,
"ignore_index": self.ignore_index
}
def init_x(self):
self.x = randomize_probability(
self.batch_size, self.class_num, dtype=self.dtype)
def init_label(self):
self.label = np.random.randint(
0, self.class_num, (self.batch_size, 1), dtype="int64")
def get_cross_entropy(self):
self.cross_entropy = np.asmatrix(
[[-np.log(self.x[i][self.label[i][0]])]
for i in range(self.x.shape[0])],
dtype="float64")
def init_attr_type(self):
pass
def init_dtype_type(self):
pass
def init_bs_class_num(self):
pass
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(["X"], "Y", numeric_grad_delta=0.001)
class TestCrossEntropyOp2(TestCrossEntropyOp):
"""Test cross-entropy with vectorized soft labels.
"""
def init_label(self):
self.label = np.random.uniform(
0.1, 1.0, [self.batch_size, self.class_num]).astype(self.dtype)
self.label /= self.label.sum(axis=1, keepdims=True)
def get_cross_entropy(self):
self.cross_entropy = (-self.label * np.log(self.x)).sum(
axis=1, keepdims=True).astype(self.dtype)
def init_attr_type(self):
self.soft_label = True
def init_dtype_type(self):
self.dtype = np.float32
def init_bs_class_num(self):
self.batch_size = 5
self.class_num = 37
def test_check_grad(self):
self.check_grad(
["X"], "Y", max_relative_error=0.05, numeric_grad_delta=0.001)
class TestCrossEntropyOp3(TestCrossEntropyOp):
"""Test cross-entropy with vectorized one-hot representation of labels.
"""
def init_label(self):
self.label_index = np.random.randint(0, self.class_num,
(self.batch_size))
self.label = np.zeros(self.x.shape).astype(self.dtype)
self.label[np.arange(self.batch_size), self.label_index] = 1
def get_cross_entropy(self):
self.cross_entropy = np.asmatrix(
[[-np.log(self.x[i][self.label_index[i]])]
for i in range(self.x.shape[0])]).astype(self.dtype)
def init_attr_type(self):
self.soft_label = True
def init_dtype_type(self):
self.dtype = np.float32
def init_bs_class_num(self):
self.batch_size = 5
self.class_num = 17
def test_check_grad(self):
self.check_grad(
["X"], "Y", max_relative_error=0.05, numeric_grad_delta=0.001)
class TestCrossEntropyOp4(TestCrossEntropyOp):
"""Test high rank tensor cross-entropy with discrete one-hot labels.
"""
def init_x(self):
self.shape = [10, 2, 4]
self.ins_num = np.prod(np.array(self.shape))
self.X_2d = randomize_probability(self.ins_num,
self.class_num).astype(self.dtype)
self.x = self.X_2d.reshape(self.shape + [self.class_num])
def init_label(self):
self.label_2d = np.random.randint(
0, self.class_num, (self.ins_num, 1), dtype="int64")
self.label = self.label_2d.reshape(self.shape + [1])
def get_cross_entropy(self):
cross_entropy_2d = np.asmatrix(
[[-np.log(self.X_2d[i][self.label_2d[i][0]])]
for i in range(self.X_2d.shape[0])]).astype(self.dtype)
self.cross_entropy = np.array(cross_entropy_2d).reshape(self.shape +
[1])
def init_attr_type(self):
self.soft_label = False
def init_dtype_type(self):
self.dtype = np.float64
def init_bs_class_num(self):
self.class_num = 10
class TestCrossEntropyOp5(TestCrossEntropyOp):
"""Test high rank tensor cross-entropy with vectorized soft labels.
"""
def init_x(self):
self.shape = [4, 3]
self.ins_num = np.prod(np.array(self.shape))
self.X_2d = randomize_probability(self.ins_num,
self.class_num).astype(self.dtype)
self.x = self.X_2d.reshape(self.shape + [self.class_num])
def init_label(self):
self.label_2d = np.random.uniform(
0.1, 1.0, [self.ins_num, self.class_num]).astype(self.dtype)
self.label_2d /= self.label_2d.sum(axis=1, keepdims=True)
self.label = self.label_2d.reshape(self.shape + [self.class_num])
def get_cross_entropy(self):
cross_entropy_2d = (-self.label_2d * np.log(self.X_2d)).sum(
axis=1, keepdims=True).astype(self.dtype)
self.cross_entropy = np.array(cross_entropy_2d).reshape(self.shape +
[1])
def init_attr_type(self):
self.soft_label = True
def init_dtype_type(self):
self.dtype = np.float32
def init_bs_class_num(self):
self.class_num = 37
def test_check_grad(self):
self.check_grad(
["X"], "Y", max_relative_error=0.05, numeric_grad_delta=0.001)
class TestCrossEntropyOp6(TestCrossEntropyOp):
"""Test high rank tensor cross-entropy with vectorized one-hot representation of labels.
"""
def init_x(self):
self.shape = [4, 3, 2]
self.ins_num = np.prod(np.array(self.shape))
self.X_2d = randomize_probability(self.ins_num,
self.class_num).astype(self.dtype)
self.x = self.X_2d.reshape(self.shape + [self.class_num])
def init_label(self):
self.label_index_2d = np.random.randint(
0, self.class_num, (self.ins_num), dtype="int64")
label_2d = np.zeros(self.X_2d.shape)
label_2d[np.arange(self.ins_num), self.label_index_2d] = 1
self.label = label_2d.reshape(self.shape + [self.class_num]).astype(
self.dtype)
def get_cross_entropy(self):
cross_entropy_2d = np.asmatrix(
[[-np.log(self.X_2d[i][self.label_index_2d[i]])]
for i in range(self.X_2d.shape[0])])
self.cross_entropy = np.array(cross_entropy_2d).reshape(
self.shape + [1]).astype(self.dtype)
def init_attr_type(self):
self.soft_label = True
def init_dtype_type(self):
self.dtype = np.float32
def init_bs_class_num(self):
self.class_num = 17
def test_check_grad(self):
self.check_grad(
["X"], "Y", max_relative_error=0.05, numeric_grad_delta=0.001)
class TestCrossEntropyOp7(TestCrossEntropyOp):
"""Test cross-entropy with ignore index.
"""
def init_label(self):
self.label = np.random.randint(
0, self.class_num, (self.batch_size, 1), dtype="int64")
def get_cross_entropy(self):
self.cross_entropy = np.asmatrix(
[[-np.log(self.x[i][self.label[i][0]])]
if self.label[i][0] != self.ignore_index else [0]
for i in range(self.x.shape[0])]).astype(self.dtype)
def init_attr_type(self):
self.soft_label = False
self.ignore_index = 3
def init_dtype_type(self):
self.dtype = np.float64
def init_bs_class_num(self):
self.batch_size = 30
self.class_num = 10
if __name__ == "__main__":
unittest.main()
...@@ -13,18 +13,9 @@ ...@@ -13,18 +13,9 @@
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
import unittest
from paddle.fluid.tests.unittests.test_elementwise_add_op import TestElementwiseAddOp
class TestNGRAPHElementwiseAddOp(TestElementwiseAddOp):
def setUp(self):
super(TestNGRAPHElementwiseAddOp, self).setUp()
self._cpu_only = True
def init_input_output(self):
super(TestNGRAPHElementwiseAddOp, self).init_input_output()
import unittest
from paddle.fluid.tests.unittests.test_elementwise_add_op import TestElementwiseAddOp, TestElementwiseAddOp_broadcast_0
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -13,24 +13,34 @@ ...@@ -13,24 +13,34 @@
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
import unittest import unittest
import numpy as np
from paddle.fluid.tests.unittests.test_fill_constant_op import TestFillConstantOp1, TestFillConstantOp2, TestFillConstantOpWithSelectedRows from paddle.fluid.tests.unittests.test_fill_constant_op import TestFillConstantOp1, TestFillConstantOp2, TestFillConstantOpWithSelectedRows
class TestNGRAPHFillConstantOp1(TestFillConstantOp1): class TestNGRAPHFillConstantFP64(TestFillConstantOp1):
def setUp(self): def setUp(self):
super(TestNGRAPHFillConstantOp1, self).setUp() super(TestNGRAPHFillConstantFP64, self).setUp()
self.attrs = {'shape': [123, 92], 'value': 3.8, 'dtype': 6}
self.outputs = {'Out': np.full((123, 92), 3.8)}
class TestNGRAPHFillConstantOp2(TestFillConstantOp2): class TestNGRAPHFillConstantINT32(TestFillConstantOp2):
def setUp(self): def setUp(self):
super(TestNGRAPHFillConstantOp2, self).setUp() super(TestNGRAPHFillConstantINT32, self).setUp()
self.attrs = {'shape': [123, 92], 'dtype': 2}
self.outputs = {'Out': np.full((123, 92), 0)}
class TestNGRAPHFillConstantOpWithSelectedRows(
TestFillConstantOpWithSelectedRows): class TestNGRAPHFillConstantINT64(TestFillConstantOp2):
def setUp(self): def setUp(self):
super(TestFillConstantOpWithSelectedRows, self).setUp() super(TestNGRAPHFillConstantINT64, self).setUp()
self.attrs = {'shape': [123, 92], 'dtype': 3}
self.outputs = {'Out': np.full((123, 92), 0)}
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -16,12 +16,5 @@ from __future__ import print_function ...@@ -16,12 +16,5 @@ from __future__ import print_function
import unittest import unittest
from paddle.fluid.tests.unittests.test_mean_op import TestMeanOp from paddle.fluid.tests.unittests.test_mean_op import TestMeanOp
class TestNGRAPHMeanOp(TestMeanOp):
def setUp(self):
super(TestNGRAPHMeanOp, self).setUp()
self._cpu_only = True
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -15,39 +15,7 @@ ...@@ -15,39 +15,7 @@
from __future__ import print_function from __future__ import print_function
import unittest import unittest
import numpy as np from paddle.fluid.tests.unittests.test_mul_op import TestMulOp, TestMulOp2
from paddle.fluid.tests.unittests.op_test import OpTest
class TestNGRAPHMulOp(OpTest):
def setUp(self):
self.op_type = "mul"
self.dtype = np.float32
self.init_dtype_type()
self.inputs = {
'X': np.random.random((2, 4)).astype(self.dtype),
'Y': np.random.random((4, 4)).astype(self.dtype)
}
self.outputs = {'Out': np.dot(self.inputs['X'], self.inputs['Y'])}
self._cpu_only = True
def init_dtype_type(self):
pass
def test_check_output(self):
self.check_output()
def test_check_grad_normal(self):
self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.5)
def test_check_grad_ingore_x(self):
self.check_grad(
['Y'], 'Out', max_relative_error=0.5, no_grad_set=set("X"))
def test_check_grad_ingore_y(self):
self.check_grad(
['X'], 'Out', max_relative_error=0.5, no_grad_set=set('Y'))
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -14,61 +14,25 @@ ...@@ -14,61 +14,25 @@
from __future__ import print_function from __future__ import print_function
from paddle.fluid.tests.unittests.test_pool2d_op import TestPool2D_Op, TestCase1, TestCase2, TestCase3, TestCase4, TestCase5 import unittest
class TestNGRAPHPool2D_Op(TestPool2D_Op):
def setUp(self):
super(TestNGRAPHPool2D_Op, self).setUp()
self._cpu_only = True
def init_test_case(self):
super(TestNGRAPHPool2D_Op, self).init_test_case()
class TestNGRAPHCase1(TestCase1):
def setUp(self):
super(TestNGRAPHCase1, self).setUp()
self._cpu_only = True
def init_test_case(self):
super(TestNGRAPHCase1, self).init_test_case()
from paddle.fluid.tests.unittests.test_pool2d_op import TestPool2D_Op, TestCase1, TestCase2, TestCase3, TestCase4, TestCase5
class TestNGRAPHCase2(TestCase2):
def setUp(self):
super(TestNGRAPHCase2, self).setUp()
self._cpu_only = True
def init_test_case(self):
super(TestNGRAPHCase2, self).init_test_case()
class TestNGRAPHCase3(TestCase3):
def setUp(self):
super(TestNGRAPHCase3, self).setUp()
self._cpu_only = True
def init_pool_type(self):
super(TestNGRAPHCase3, self).init_pool_type()
class TestNGRAPHCase4(TestCase4): class TestNGRAPHCeilMode(TestCase1):
def setUp(self): def setUp(self):
super(TestNGRAPHCase4, self).setUp() super(TestNGRAPHCeilMode, self).setUp()
self._cpu_only = True
def init_pool_type(self): def init_ceil_mode(self):
super(TestNGRAPHCase4, self).init_pool_type() self.ceil_mode = True
class TestNGRAPHCase5(TestCase5): class TestNGRAPHAdaptive(TestCase1):
def setUp(self): def setUp(self):
super(TestNGRAPHCase5, self).setUp() super(TestNGRAPHAdaptive, self).setUp()
self._cpu_only = True
def init_pool_type(self): def init_adaptive(self):
super(TestNGRAPHCase5, self).init_pool_type() self.adaptive = True
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -15,24 +15,5 @@ from __future__ import print_function ...@@ -15,24 +15,5 @@ from __future__ import print_function
import unittest import unittest
from paddle.fluid.tests.unittests.test_scale_op import TestScaleOp, TestScaleOpSelectedRows from paddle.fluid.tests.unittests.test_scale_op import TestScaleOp, TestScaleOpSelectedRows
class TestNGRAPHScaleOp(TestScaleOp):
def setUp(self):
super(TestNGRAPHScaleOp, self).setUp()
self._cpu_only = True
def init_dtype_type(self):
pass
class TestNGRAPHScaleOpSelectedRows(TestScaleOpSelectedRows):
def setUp(self):
super(TestNGRAPHScaleOpSelectedRows, self).setUp()
self._cpu_only = True
def init_dtype_type(self):
pass
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -16,11 +16,5 @@ from __future__ import print_function ...@@ -16,11 +16,5 @@ from __future__ import print_function
import unittest import unittest
from paddle.fluid.tests.unittests.test_softmax_op import TestSoftmaxOp from paddle.fluid.tests.unittests.test_softmax_op import TestSoftmaxOp
class TestSoftmaxNGRAPHOp(TestSoftmaxOp):
def setUp(self):
super(TestSoftmaxNGRAPHOp, self).setUp()
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -16,30 +16,5 @@ from __future__ import print_function ...@@ -16,30 +16,5 @@ from __future__ import print_function
import unittest import unittest
from paddle.fluid.tests.unittests.test_top_k_op import TestTopkOp, TestTopkOp3d, TestTopkOp2, TestTopkOp3, TestTopkOp4 from paddle.fluid.tests.unittests.test_top_k_op import TestTopkOp, TestTopkOp3d, TestTopkOp2, TestTopkOp3, TestTopkOp4
class TestNGRAPHTopkOp(TestTopkOp):
def setUp(self):
super(TestNGRAPHTopkOp, self).setUp()
self._cpu_only = True
class TestNGRAPHTopkOp2(TestTopkOp2):
def setUp(self):
super(TestNGRAPHTopkOp2, self).setUp()
self._cpu_only = True
class TestNGRAPHTopkOp3(TestTopkOp3):
def setUp(self):
super(TestNGRAPHTopkOp3, self).setUp()
self._cpu_only = True
class TestNGRAPHTopkOp4(TestTopkOp4):
def setUp(self):
super(TestNGRAPHTopkOp4, self).setUp()
self._cpu_only = True
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
from __future__ import print_function from __future__ import print_function
import os
import unittest import unittest
import numpy as np import numpy as np
import random import random
...@@ -374,6 +375,9 @@ class OpTest(unittest.TestCase): ...@@ -374,6 +375,9 @@ class OpTest(unittest.TestCase):
return [] return []
places = [fluid.CPUPlace()] places = [fluid.CPUPlace()]
cpu_only = self._cpu_only if hasattr(self, '_cpu_only') else False cpu_only = self._cpu_only if hasattr(self, '_cpu_only') else False
use_ngraph = bool(os.getenv("FLAGS_use_ngraph", False))
if use_ngraph:
cpu_only = True
if core.is_compiled_with_cuda() and core.op_support_gpu(self.op_type)\ if core.is_compiled_with_cuda() and core.op_support_gpu(self.op_type)\
and not cpu_only: and not cpu_only:
places.append(core.CUDAPlace(0)) places.append(core.CUDAPlace(0))
......
...@@ -79,7 +79,7 @@ class TestParallelExecutorBase(unittest.TestCase): ...@@ -79,7 +79,7 @@ class TestParallelExecutorBase(unittest.TestCase):
if use_reduce else fluid.BuildStrategy.ReduceStrategy.AllReduce if use_reduce else fluid.BuildStrategy.ReduceStrategy.AllReduce
build_strategy.fuse_elewise_add_act_ops = fuse_elewise_add_act_ops build_strategy.fuse_elewise_add_act_ops = fuse_elewise_add_act_ops
build_strategy.fuse_relu_depthwise_conv = fuse_relu_depthwise_conv build_strategy.fuse_relu_depthwise_conv = fuse_relu_depthwise_conv
build_strategy.memory_optimize = use_ir_memory_optimize build_strategy.memory_optimize = False if memory_opt else use_ir_memory_optimize
# python memory optimization is conflict with inplace pass. # python memory optimization is conflict with inplace pass.
# Use ir graph memory optimization after inplace pass is the correct way. # Use ir graph memory optimization after inplace pass is the correct way.
build_strategy.enable_inplace = False if memory_opt else enable_inplace build_strategy.enable_inplace = False if memory_opt else enable_inplace
......
...@@ -121,6 +121,8 @@ class TestMNIST(TestParallelExecutorBase): ...@@ -121,6 +121,8 @@ class TestMNIST(TestParallelExecutorBase):
regularization=fluid.regularizer.L2Decay(1e-6)) regularization=fluid.regularizer.L2Decay(1e-6))
return optimizer return optimizer
# NOTE(dzh):
# need to make it compatible with elewise fuse act
not_fuse_op_first_loss, not_fuse_op_last_loss = self.check_network_convergence( not_fuse_op_first_loss, not_fuse_op_last_loss = self.check_network_convergence(
model, model,
feed_dict={"image": img, feed_dict={"image": img,
...@@ -128,6 +130,7 @@ class TestMNIST(TestParallelExecutorBase): ...@@ -128,6 +130,7 @@ class TestMNIST(TestParallelExecutorBase):
use_cuda=use_cuda, use_cuda=use_cuda,
fuse_elewise_add_act_ops=False, fuse_elewise_add_act_ops=False,
memory_opt=False, memory_opt=False,
use_ir_memory_optimize=False,
optimizer=_optimizer) optimizer=_optimizer)
fuse_op_first_loss, fuse_op_last_loss = self.check_network_convergence( fuse_op_first_loss, fuse_op_last_loss = self.check_network_convergence(
model, model,
...@@ -136,6 +139,7 @@ class TestMNIST(TestParallelExecutorBase): ...@@ -136,6 +139,7 @@ class TestMNIST(TestParallelExecutorBase):
use_cuda=use_cuda, use_cuda=use_cuda,
fuse_elewise_add_act_ops=True, fuse_elewise_add_act_ops=True,
memory_opt=False, memory_opt=False,
use_ir_memory_optimize=False,
optimizer=_optimizer) optimizer=_optimizer)
for loss in zip(not_fuse_op_first_loss, fuse_op_first_loss): for loss in zip(not_fuse_op_first_loss, fuse_op_first_loss):
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import unittest
import paddle.fluid as fluid
import paddle.fluid.core as core
os.environ['FLAGS_eager_delete_tensor_gb'] = "0.0"
os.environ[
'RECORDIO_FILENAME'] = '/tmp/ir_memory_optimize_transformer.wmt16.recordio'
from test_parallel_executor_transformer import TestTransformer
from test_parallel_executor_transformer import transformer
# NOTE(dzhwinter): test diferent strategy colisions.
# open the eager delete tensor strategy by default.
class TestTransformerWithIR(TestTransformer):
def test_main(self):
if core.is_compiled_with_cuda():
# check python transpiler
self.check_network_convergence(
transformer,
use_cuda=True,
memory_opt=True,
use_ir_memory_optimize=False)
# check IR memory optimize
self.check_network_convergence(
transformer,
use_cuda=True,
memory_opt=False,
use_ir_memory_optimize=True)
if __name__ == '__main__':
unittest.main()
requests==2.9.2 requests==2.9.2
numpy>=1.12 numpy>=1.12
protobuf==3.1 protobuf>=3.6
recordio>=0.1.0 recordio>=0.1.0
matplotlib==2.2.3 # TODO: let python3 paddlepaddle package use latest matplotlib matplotlib==2.2.3 # TODO: let python3 paddlepaddle package use latest matplotlib
rarfile rarfile
......
...@@ -31,10 +31,10 @@ RUN wget --no-check-certificate -qO- https://storage.googleapis.com/golang/go1.8 ...@@ -31,10 +31,10 @@ RUN wget --no-check-certificate -qO- https://storage.googleapis.com/golang/go1.8
ENV GOROOT=/usr/local/go GOPATH=/root/gopath ENV GOROOT=/usr/local/go GOPATH=/root/gopath
ENV PATH=${GOROOT}/bin:${GOPATH}/bin:${PATH} ENV PATH=${GOROOT}/bin:${GOPATH}/bin:${PATH}
# protobuf 3.1.0 # protobuf 3.6.1
RUN cd /opt && wget -q --no-check-certificate https://github.com/google/protobuf/releases/download/v3.1.0/protobuf-cpp-3.1.0.tar.gz && \ RUN cd /opt && wget -q --no-check-certificate https://github.com/google/protobuf/releases/download/v3.6.1/protobuf-cpp-3.6.1.tar.gz && \
tar xzf protobuf-cpp-3.1.0.tar.gz && \ tar xzf protobuf-cpp-3.6.1.tar.gz && \
cd protobuf-3.1.0 && ./configure && make -j4 && make install && cd .. && rm -f protobuf-cpp-3.1.0.tar.gz cd protobuf-3.6.1 && ./configure && make -j4 && make install && cd .. && rm -f protobuf-cpp-3.6.1.tar.gz
RUN wget https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/python/requirements.txt -O /root/requirements.txt RUN wget https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/python/requirements.txt -O /root/requirements.txt
......
...@@ -17,7 +17,7 @@ OPENSSL_ROOT=openssl-1.1.0i ...@@ -17,7 +17,7 @@ OPENSSL_ROOT=openssl-1.1.0i
OPENSSL_HASH=ebbfc844a8c8cc0ea5dc10b86c9ce97f401837f3fa08c17b2cdadc118253cf99 OPENSSL_HASH=ebbfc844a8c8cc0ea5dc10b86c9ce97f401837f3fa08c17b2cdadc118253cf99
EPEL_RPM_HASH=e5ed9ecf22d0c4279e92075a64c757ad2b38049bcf5c16c4f2b75d5f6860dc0d EPEL_RPM_HASH=e5ed9ecf22d0c4279e92075a64c757ad2b38049bcf5c16c4f2b75d5f6860dc0d
DEVTOOLS_HASH=a8ebeb4bed624700f727179e6ef771dafe47651131a00a78b342251415646acc DEVTOOLS_HASH=a8ebeb4bed624700f727179e6ef771dafe47651131a00a78b342251415646acc
PATCHELF_HASH=d9afdff4baeacfbc64861454f368b7f2c15c44d245293f7587bbf726bfe722fb PATCHELF_HASH=f2aa40a6148cb3b0ca807a1bf836b081793e55ec9e5540a5356d800132be7e0a
CURL_ROOT=curl-7.49.1 CURL_ROOT=curl-7.49.1
CURL_HASH=eb63cec4bef692eab9db459033f409533e6d10e20942f4b060b32819e81885f1 CURL_HASH=eb63cec4bef692eab9db459033f409533e6d10e20942f4b060b32819e81885f1
AUTOCONF_ROOT=autoconf-2.69 AUTOCONF_ROOT=autoconf-2.69
...@@ -107,11 +107,11 @@ curl-config --features ...@@ -107,11 +107,11 @@ curl-config --features
rm -rf /usr/local/ssl rm -rf /usr/local/ssl
# Install patchelf (latest with unreleased bug fixes) # Install patchelf (latest with unreleased bug fixes)
curl -sLO http://nipy.bic.berkeley.edu/manylinux/patchelf-0.9njs2.tar.gz curl -sLO https://nixos.org/releases/patchelf/patchelf-0.9/patchelf-0.9.tar.gz
check_sha256sum patchelf-0.9njs2.tar.gz $PATCHELF_HASH check_sha256sum patchelf-0.9.tar.gz $PATCHELF_HASH
tar -xzf patchelf-0.9njs2.tar.gz tar -xzf patchelf-0.9.tar.gz
(cd patchelf-0.9njs2 && ./configure && make && make install) (cd patchelf-0.9 && ./configure && make && make install)
rm -rf patchelf-0.9njs2.tar.gz patchelf-0.9njs2 rm -rf patchelf-0.9.tar.gz patchelf-0.9
# Install latest pypi release of auditwheel # Install latest pypi release of auditwheel
LD_LIBRARY_PATH="${ORIGINAL_LD_LIBRARY_PATH}:$(dirname ${PY35_BIN})/lib" $PY35_BIN/pip install auditwheel LD_LIBRARY_PATH="${ORIGINAL_LD_LIBRARY_PATH}:$(dirname ${PY35_BIN})/lib" $PY35_BIN/pip install auditwheel
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册