diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index e05b7694ddf1e1652b00f156cde1a2d433c9fc46..3da3f10d7c91094ff8ae7daeadfb55e4e44eb731 100644 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -203,7 +203,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) ENDIF() SET(PROTOBUF_REPO "https://github.com/google/protobuf.git") - SET(PROTOBUF_TAG "9f75c5aa851cd877fb0d93ccc31b8567a6706546") + SET(PROTOBUF_TAG "v3.6.1") ExternalProject_Add( ${TARGET_NAME} @@ -231,7 +231,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) ) ENDFUNCTION() -SET(PROTOBUF_VERSION 3.1) +SET(PROTOBUF_VERSION 3.6.1) IF(NOT PROTOBUF_FOUND) build_protobuf(extern_protobuf FALSE) diff --git a/cmake/external/python.cmake b/cmake/external/python.cmake index 623c53f4f75bbd217c157bcdda0cb12c510269ee..351e7fa3ce227ee3a92f527fc49c873e52342594 100644 --- a/cmake/external/python.cmake +++ b/cmake/external/python.cmake @@ -74,8 +74,8 @@ IF(PYTHONINTERP_FOUND) find_python_module(wheel REQUIRED) find_python_module(google.protobuf REQUIRED) FIND_PACKAGE(NumPy REQUIRED) - IF(${PY_GOOGLE.PROTOBUF_VERSION} AND ${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0") - MESSAGE(FATAL_ERROR "Found Python Protobuf ${PY_GOOGLE.PROTOBUF_VERSION} < 3.0.0, " + IF(${PY_GOOGLE.PROTOBUF_VERSION} AND ${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.6.1") + MESSAGE(FATAL_ERROR "Found Python Protobuf ${PY_GOOGLE.PROTOBUF_VERSION} < 3.6.1, " "please use pip to upgrade protobuf. pip install -U protobuf") ENDIF() ENDIF(PYTHONINTERP_FOUND) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 21a9b6e0ae0d0349236db5a829a1dc119f49241a..9d15fada6d917fa967c9fdff7854be5c28ded607 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -262,7 +262,7 @@ paddle.fluid.layers.increment ArgSpec(args=['x', 'value', 'in_place'], varargs=N paddle.fluid.layers.array_write ArgSpec(args=['x', 'i', 'array'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.create_array ArgSpec(args=['dtype'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.less_than ArgSpec(args=['x', 'y', 'force_cpu', 'cond'], varargs=None, keywords='ignored', defaults=(None, None)) -paddle.fluid.layers.equal ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords='ignored', defaults=(None,)) +paddle.fluid.layers.equal ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.array_read ArgSpec(args=['array', 'i'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.array_length ArgSpec(args=['array'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.IfElse.__init__ ArgSpec(args=['self', 'cond', 'name'], varargs=None, keywords=None, defaults=(None,)) @@ -474,11 +474,11 @@ paddle.fluid.LoDTensor.has_valid_recursive_sequence_lengths has_valid_recursive_ paddle.fluid.LoDTensor.lod lod(self: paddle.fluid.core.LoDTensor) -> List[List[int]] paddle.fluid.LoDTensor.recursive_sequence_lengths recursive_sequence_lengths(self: paddle.fluid.core.LoDTensor) -> List[List[int]] paddle.fluid.LoDTensor.set 1. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float32], arg1: paddle::platform::CPUPlace) -> None 2. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int32], arg1: paddle::platform::CPUPlace) -> None 3. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float64], arg1: paddle::platform::CPUPlace) -> None 4. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int64], arg1: paddle::platform::CPUPlace) -> None 5. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[bool], arg1: paddle::platform::CPUPlace) -> None 6. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint16], arg1: paddle::platform::CPUPlace) -> None 7. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint8], arg1: paddle::platform::CPUPlace) -> None 8. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int8], arg1: paddle::platform::CPUPlace) -> None 9. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float32], arg1: paddle::platform::CUDAPlace) -> None 10. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int32], arg1: paddle::platform::CUDAPlace) -> None 11. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float64], arg1: paddle::platform::CUDAPlace) -> None 12. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int64], arg1: paddle::platform::CUDAPlace) -> None 13. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[bool], arg1: paddle::platform::CUDAPlace) -> None 14. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint16], arg1: paddle::platform::CUDAPlace) -> None 15. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint8], arg1: paddle::platform::CUDAPlace) -> None 16. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int8], arg1: paddle::platform::CUDAPlace) -> None 17. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float32], arg1: paddle::platform::CUDAPinnedPlace) -> None 18. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int32], arg1: paddle::platform::CUDAPinnedPlace) -> None 19. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float64], arg1: paddle::platform::CUDAPinnedPlace) -> None 20. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int64], arg1: paddle::platform::CUDAPinnedPlace) -> None 21. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[bool], arg1: paddle::platform::CUDAPinnedPlace) -> None 22. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint16], arg1: paddle::platform::CUDAPinnedPlace) -> None 23. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint8], arg1: paddle::platform::CUDAPinnedPlace) -> None 24. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int8], arg1: paddle::platform::CUDAPinnedPlace) -> None -paddle.fluid.LoDTensor.set_lod set_lod(self: paddle.fluid.core.LoDTensor, arg0: List[List[int]]) -> None -paddle.fluid.LoDTensor.set_recursive_sequence_lengths set_recursive_sequence_lengths(self: paddle.fluid.core.LoDTensor, arg0: List[List[int]]) -> None +paddle.fluid.LoDTensor.set_lod set_lod(self: paddle.fluid.core.LoDTensor, lod: List[List[int]]) -> None +paddle.fluid.LoDTensor.set_recursive_sequence_lengths set_recursive_sequence_lengths(self: paddle.fluid.core.LoDTensor, recursive_sequence_lengths: List[List[int]]) -> None paddle.fluid.LoDTensor.shape shape(self: paddle.fluid.core.Tensor) -> List[int] paddle.fluid.LoDTensorArray.__init__ __init__(self: paddle.fluid.core.LoDTensorArray) -> None -paddle.fluid.LoDTensorArray.append append(self: paddle.fluid.core.LoDTensorArray, arg0: paddle.fluid.core.LoDTensor) -> None +paddle.fluid.LoDTensorArray.append append(self: paddle.fluid.core.LoDTensorArray, tensor: paddle.fluid.core.LoDTensor) -> None paddle.fluid.CPUPlace.__init__ __init__(self: paddle.fluid.core.CPUPlace) -> None paddle.fluid.CUDAPlace.__init__ __init__(self: paddle.fluid.core.CUDAPlace, arg0: int) -> None paddle.fluid.CUDAPinnedPlace.__init__ __init__(self: paddle.fluid.core.CUDAPinnedPlace) -> None diff --git a/paddle/fluid/framework/details/CMakeLists.txt b/paddle/fluid/framework/details/CMakeLists.txt index e88084424baf7eb5cd1d67ea19966866d71ec3eb..dc308fd2592bb158f46f6eac9dd0df25787559fe 100644 --- a/paddle/fluid/framework/details/CMakeLists.txt +++ b/paddle/fluid/framework/details/CMakeLists.txt @@ -50,7 +50,12 @@ cc_library(data_balance_op_handle SRCS data_balance_op_handle.cc DEPS op_handle_ cc_library(gather_op_handle SRCS gather_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor) cc_library(fuse_vars_op_handle SRCS fuse_vars_op_handle.cc DEPS op_handle_base scope) -cc_library(memory_optimize_helper SRCS memory_optimize_helper.cc DEPS graph graph_helper) +if(WITH_GPU) +cc_library(memory_optimize_helper SRCS memory_optimize_helper.cc DEPS graph graph_helper gpu_info) +else() +cc_library(memory_optimize_helper SRCS memory_optimize_helper.cc DEPS graph graph_helper cpu_info) +endif() + cc_library(memory_optimize_pass SRCS memory_optimize_pass.cc DEPS memory_optimize_helper pass) cc_library(inplace_op_pass SRCS inplace_op_pass.cc DEPS memory_optimize_pass op_info) cc_library(modify_op_lock_and_record_event_pass SRCS modify_op_lock_and_record_event_pass.cc DEPS computation_op_handle op_graph_view multi_devices_helper) diff --git a/paddle/fluid/framework/details/build_strategy.cc b/paddle/fluid/framework/details/build_strategy.cc index f8030c53f72bc8a6f007c1eb6a3072abd8037de2..0c823b9ca2a135101f7776bf0a8f94f4a93ffb03 100644 --- a/paddle/fluid/framework/details/build_strategy.cc +++ b/paddle/fluid/framework/details/build_strategy.cc @@ -240,7 +240,9 @@ std::unique_ptr BuildStrategy::Apply( continue; } } + VLOG(3) << "Start Apply Pass " << pass->Type(); graph = pass->Apply(std::move(graph)); + VLOG(3) << "Finish Apply Pass " << pass->Type(); } return graph; } diff --git a/paddle/fluid/framework/details/inplace_op_pass.cc b/paddle/fluid/framework/details/inplace_op_pass.cc index b0c5968499be3a959dd6103424f25056a6dc2282..c91fc81b2defc9fe6b5720ce652a9aa94b27735e 100644 --- a/paddle/fluid/framework/details/inplace_op_pass.cc +++ b/paddle/fluid/framework/details/inplace_op_pass.cc @@ -49,7 +49,7 @@ DEFINE_bool( "If this option turns on, only these op in whitelist can be inplaced." "If it turns off, all of the running op can be candidate of inplaced op." "Such as scale, elementwise_add" - "By default, it's turned on"); + "By default, it's turned off"); DECLARE_string(memory_optimize_debug); diff --git a/paddle/fluid/framework/details/memory_optimize_helper.cc b/paddle/fluid/framework/details/memory_optimize_helper.cc index 6345ba335997ec42ebc63f90e9bf6a3ed2648edc..6126c168ccf4b52ae75648d2dfe48db770639214 100644 --- a/paddle/fluid/framework/details/memory_optimize_helper.cc +++ b/paddle/fluid/framework/details/memory_optimize_helper.cc @@ -13,13 +13,19 @@ // limitations under the License. #include "paddle/fluid/framework/details/memory_optimize_helper.h" +#include #include #include -#include +#include #include #include #include #include "paddle/fluid/framework/var_desc.h" +#include "paddle/fluid/platform/cpu_info.h" + +#ifdef PADDLE_WITH_CUDA +#include "paddle/fluid/platform/gpu_info.h" +#endif // PADDLE_WITH_CUDA namespace paddle { namespace framework { @@ -166,6 +172,11 @@ struct NodeComparator { bool operator()(ir::Node* lhs, ir::Node* rhs) const { auto* lhs_desc = FindVarDescInBlock(lhs); auto* rhs_desc = FindVarDescInBlock(rhs); + // match data type + if (lhs_desc->GetDataType() != rhs_desc->GetDataType()) { + return false; + } + // match shape auto lhs_shape = lhs_desc->GetShape(); auto rhs_shape = rhs_desc->GetShape(); if ((lhs_shape[0] == -1 && rhs_shape[0] == -1) || @@ -230,6 +241,27 @@ ir::Node* OrderedSet::FindBestFitNode(ir::Node* var) const { return found_node; } +ir::Node* OrderedSet::FindNextBestFitNode(ir::Node* var, ir::Node* prev) const { + ir::Node* found_node = nullptr; + NodeComparator functor; + auto it = + std::find_if(nodes_.begin(), nodes_.end(), [&](const NodeVector& v) { + if (v.front() == prev) + return true; + else + return false; + }); + PADDLE_ENFORCE(it != nodes_.end(), "Not found previous in node list!"); + for (it = std::next(it); it != nodes_.end(); ++it) { + auto& candidate = it->front(); + if (functor(var, candidate)) { + found_node = candidate; + break; + } + } + return found_node; +} + bool OrderedSet::Has(ir::Node* var) const { if (mark_table_.count(var->Name())) { auto& node_in_samename = mark_table_.at(var->Name()); @@ -241,10 +273,15 @@ bool OrderedSet::Has(ir::Node* var) const { return false; } +void OrderedSet::Erase(const std::string& var) { + PADDLE_ENFORCE(mark_table_.count(var)); + nodes_.erase(mark_table_[var]); + mark_table_.erase(var); +} + void OrderedSet::Erase(ir::Node* var) { - PADDLE_ENFORCE(mark_table_.count(var->Name())); - nodes_.erase(mark_table_[var->Name()]); - mark_table_.erase(var->Name()); + PADDLE_ENFORCE(var != nullptr); + Erase(var->Name()); } std::string OrderedSet::ToString() const { @@ -274,14 +311,35 @@ bool NodeCanReused(ir::Node* node) { return flag; } +int MinChunkSize() { + int size{0}; +#ifdef PADDLE_WITH_CUDA + size = platform::GpuMinChunkSize(); +#else + size = platform::CpuMinChunkSize(); +#endif // PADDLE_WITH_CUDA + return size; +} + bool NodeCanReused(const VarDesc& node) { auto type = node.GetType(); + // only these types holds bulk of gpu memory if (!(type == proto::VarType::LOD_TENSOR || type == proto::VarType::SELECTED_ROWS || type == proto::VarType::LOD_TENSOR_ARRAY)) { return false; } - if (node.Persistable() || node.GetShape().empty()) { + // persistable variable is parameter + if (node.Persistable()) { + return false; + } + // shape < min_chunk_size is meaningless. + // further more, fetched loss always has size = 1 + // which should not be reused. + auto shape = node.GetShape(); + int size = std::abs( + std::accumulate(shape.begin(), shape.end(), 1, std::multiplies())); + if (shape.empty() || size < MinChunkSize()) { return false; } // vars can be @EMPTY@, @LR_DECAY_REUSE_ID@. For example, while_grad @@ -461,7 +519,9 @@ ir::Node* ControlFlowGraph::GetNodeByName(const std::string& name, for (auto* node : ops_) { if (node == op) break; for (auto& output : node->outputs) { - if (output->Name() == name) { + PADDLE_ENFORCE((output != nullptr && output->IsVar()), + "Output is empty!"); + if (output->Var() && output->Name() == name) { found_node = output; } } diff --git a/paddle/fluid/framework/details/memory_optimize_helper.h b/paddle/fluid/framework/details/memory_optimize_helper.h index 0bfaf827fea84030de48a9984197f5b39f5c9261..dba96309fdf37e6ca73780cb194843659e1bd2d1 100644 --- a/paddle/fluid/framework/details/memory_optimize_helper.h +++ b/paddle/fluid/framework/details/memory_optimize_helper.h @@ -55,6 +55,7 @@ class OrderedSet { void Insert(ir::Node* var); void Erase(ir::Node* var); + void Erase(const std::string& var); bool Has(ir::Node* var) const; void Clear() { mark_table_.clear(); @@ -62,6 +63,7 @@ class OrderedSet { } // find the bestfit shape node block with var. ir::Node* FindBestFitNode(ir::Node* var) const; + ir::Node* FindNextBestFitNode(ir::Node* var, ir::Node* prev) const; // map store non-const iterator, can not promise const int GetNodeIndexInPool(ir::Node* var); // pool all node to string diff --git a/paddle/fluid/framework/details/memory_optimize_helper_test.cc b/paddle/fluid/framework/details/memory_optimize_helper_test.cc index 5c13dda9e5491044d2bcbed4b24d438cc8b8a413..3cfe297a73cf4128b7191cbd432cdceadc6240ec 100644 --- a/paddle/fluid/framework/details/memory_optimize_helper_test.cc +++ b/paddle/fluid/framework/details/memory_optimize_helper_test.cc @@ -107,6 +107,52 @@ TEST(OrderedSet, Normal) { ASSERT_EQ(pool.GetNodeIndexInPool(cache), 5); // match 4:[5,2] } } + +TEST(OrderedSet, FindBestFitNode) { + OrderedSet pool; + std::vector> nodes; + ProgramDesc prog; + BlockDesc* block_desc = prog.MutableBlock(0); + auto* op_desc = block_desc->AppendOp(); + op_desc->SetType("dummy"); + std::unique_ptr op = ir::CreateNodeForTest(op_desc); + + { + auto desc = block_desc->Var("a"); + desc->SetShape({128, 128}); + std::unique_ptr node = ir::CreateNodeForTest(desc); + node->inputs.emplace_back(op.get()); + nodes.emplace_back(std::move(node)); + } + { + auto desc = block_desc->Var("b"); + desc->SetShape({128, 129}); + std::unique_ptr node = ir::CreateNodeForTest(desc); + node->inputs.emplace_back(op.get()); + nodes.emplace_back(std::move(node)); + } + { + auto desc = block_desc->Var("c"); + desc->SetShape({128, 128}); + std::unique_ptr node = ir::CreateNodeForTest(desc); + node->inputs.emplace_back(op.get()); + nodes.emplace_back(std::move(node)); + } + + for (auto& node : nodes) { + pool.Insert(node.get()); + } + + // FindNextBestFitNode + auto* n = nodes[0].get(); + auto* cache = pool.FindBestFitNode(n); + PADDLE_ENFORCE(cache->Name() == "a"); + cache = pool.FindNextBestFitNode(n, cache); + PADDLE_ENFORCE(cache->Name() == "c"); + cache = pool.FindNextBestFitNode(n, cache); + PADDLE_ENFORCE(cache->Name() == "b"); +} + } // namespace details } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/details/memory_optimize_pass.cc b/paddle/fluid/framework/details/memory_optimize_pass.cc index 41e4a834df0abab069ab1f6cd21c8479b911250c..b35b967c72dcec7c427f6921c094f7a87d536f25 100644 --- a/paddle/fluid/framework/details/memory_optimize_pass.cc +++ b/paddle/fluid/framework/details/memory_optimize_pass.cc @@ -69,55 +69,59 @@ std::unique_ptr MemoryOptimizePass::ApplyImpl( } for (auto& var : op->outputs) { - if (!NodeCanReused(var) || cfg_->Use(op).count(var->Name()) == 0 || - skip_set_.count(var->Name())) + if (var->IsVar() && !var->IsCtrlVar() && skip_set_.count(var->Name())) { + VLOG(3) << "Skip set contains variable of " << var->Name() + << "disable reuse on it. skipped"; continue; - ir::Node* cache = pool_.FindBestFitNode(var); - - if (var->Name() == FLAGS_memory_optimize_debug) { - VLOG(3) << "start match var " << DebugString(var) << " of op " - << op->Name(); - VLOG(3) << pool_.ToString(); - VLOG(3) << "matched in pool : " - << ((cache == nullptr) ? "False" : "True"); } + if (NodeCanReused(var) && cfg_->Use(op).count(var->Name()) == 0) { + ir::Node* cache = pool_.FindBestFitNode(var); + while (cache != nullptr && var->Name() == cache->Name()) { + VLOG(3) << "The same cache variable is cascade reused. " + << cache->Name() << " is re-filled to the pool after " + << "the reused op is finished. Current op can not " + << "replace it again. Skip this candidate."; + cache = pool_.FindNextBestFitNode(var, cache); + } + if (var->Name() == FLAGS_memory_optimize_debug) { + VLOG(3) << "start match var " << DebugString(var) << " of op " + << op->Name(); + VLOG(3) << pool_.ToString(); + VLOG(3) << "matched in pool : " + << ((cache == nullptr) ? "False" : "True"); + } - if (cache == nullptr) continue; - if (var->Name() == cache->Name()) { - VLOG(3) << "The same cache variable is cascade reused." << var->Name() - << " is re-filled to the pool after" - << "the reused op is finished. Current op can not " - << "replace it again. Skip this candidate."; - continue; - - int node_idx_in_pool = pool_.GetNodeIndexInPool(cache); - VLOG(3) << string::Sprintf( - "!!! %s, %s => %s, cache idx %d, pool size %d", - std::to_string(reuse_id++), DebugString(var), DebugString(cache), - node_idx_in_pool, static_cast(pool_.size())); - - // update CFG Graph on the fly. - // reused var maybe re-fill into the pool - cfg_->RenameVarInCFGGraph(var->Name(), cache->Name(), idx); - // NOTE(dzhwinter): we need to both update the ProgramDesc - // and IR Graph. because op_desc/var_desc is used in CreateOp, - // CreateVar when running happens. But IR Graph - // define the dependence relationship between nodes. - RenameVarInGraphDesc(var->Name(), cache->Name(), idx); - RenameVarInGraphNode(var->Name(), cache->Name(), idx, graph.get()); - - pool_.Erase(cache); - } + if (cache != nullptr) { + int node_idx_in_pool = pool_.GetNodeIndexInPool(cache); + VLOG(3) << string::Sprintf( + "!!! %s, %s => %s, cache idx %d, pool size %d", + std::to_string(reuse_id++), DebugString(var), DebugString(cache), + node_idx_in_pool, static_cast(pool_.size())); + // NOTE(dzhwinter): update the ProgramDesc/IR Graph + // and the CFG Graph on the fly. + // + // IR Graph define the dependence relationship between nodes. + // + // ProgramDesc defines the input/output vars. Its used in + // CreateOp, CreateVar when running happens. + // + // CFG Graph store the liveness information, when reuse happens + // we also need to update the variable liveness. + const std::string var_name = var->Name(); + const std::string cache_name = cache->Name(); - // fill the pool - std::unordered_set unlived_vars; - for (auto var : cfg_->LiveIn(op)) { - if (cfg_->LiveOut(op).count(var) == 0) { - unlived_vars.emplace(var); + cfg_->RenameVarInCFGGraph(var_name, cache_name, idx); + RenameVarInGraphDesc(var_name, cache_name, idx); + RenameVarInGraphNode(var_name, cache_name, idx, graph.get()); + pool_.Erase(cache_name); } } - for (auto var : unlived_vars) { + } + // fill the pool + for (auto var : cfg_->LiveIn(op)) { + if (cfg_->LiveOut(op).count(var) == 0) { ir::Node* var_node = cfg_->GetNodeByName(var, op); + if (var_node == nullptr || var_node->IsCtrlVar()) continue; if (NodeCanReused(var_node) && !pool_.Has(var_node)) { pool_.Insert(var_node); } @@ -273,8 +277,7 @@ void MemoryOptimizePass::RenameVarInGraphNode(const std::string& var, // redirect the input to the latest version of cache_var for (auto* node : op->inputs) { if (node->Name() == var) { - ir::Node* cache_node = graph->CreateVarNode(var_desc.get()); - var_nodes_[cache_var].emplace_back(cache_node); + ir::Node* cache_node = var_nodes_[cache_var].back(); // swap node to cache_node cache_node->outputs.insert(cache_node->outputs.end(), @@ -283,11 +286,15 @@ void MemoryOptimizePass::RenameVarInGraphNode(const std::string& var, auto* prev_op = node->inputs[0]; std::replace(prev_op->outputs.begin(), prev_op->outputs.end(), node, cache_node); - cache_node->inputs.emplace_back(prev_op); for (auto* next_op : node->outputs) { std::replace(next_op->inputs.begin(), next_op->inputs.end(), node, cache_node); } + + // erase unused node + auto& nodes = var_nodes_.at(var); + nodes.erase(std::remove(nodes.begin(), nodes.end(), node), nodes.end()); + graph->RemoveNode(node); } } @@ -307,15 +314,14 @@ void MemoryOptimizePass::RenameVarInGraphNode(const std::string& var, std::replace(next_op->inputs.begin(), next_op->inputs.end(), node, cache_node); } + + // erase unused node + auto& nodes = var_nodes_.at(var); + nodes.erase(std::remove(nodes.begin(), nodes.end(), node), nodes.end()); + graph->RemoveNode(node); } } } - - // release node of unused var in graph - for (auto* node : var_nodes_[var]) { - graph->RemoveNode(node); - } - var_nodes_.at(var).clear(); } } // namespace details diff --git a/paddle/fluid/framework/inplace_op_inference_test.cc b/paddle/fluid/framework/inplace_op_inference_test.cc index 3e4d715c6f089496d1b1f7906e3f10147a073622..bf9d1dcd380cdff886301faf13b0015fd5a2ed5c 100644 --- a/paddle/fluid/framework/inplace_op_inference_test.cc +++ b/paddle/fluid/framework/inplace_op_inference_test.cc @@ -179,11 +179,11 @@ TEST(InferInplace, SingleOpInplaceInToOut) { op->SetOutput("Out", {"test2_out"}); prog.MutableBlock(0)->Var("test2_a")->SetType(proto::VarType::LOD_TENSOR); - prog.MutableBlock(0)->Var("test2_a")->SetShape({32, 64}); + prog.MutableBlock(0)->Var("test2_a")->SetShape({32, 64, 128, 128}); prog.MutableBlock(0)->Var("test2_b")->SetType(proto::VarType::LOD_TENSOR); prog.MutableBlock(0)->Var("test2_c")->SetType(proto::VarType::LOD_TENSOR); prog.MutableBlock(0)->Var("test2_out"); - prog.MutableBlock(0)->Var("test2_out")->SetShape({32, 16}); + prog.MutableBlock(0)->Var("test2_out")->SetShape({32, 16, 128, 128}); auto& infer_inplace = OpInfoMap::Instance().Get(op->Type()).infer_inplace_; auto in_to_outs = infer_inplace(*op, op->Block()); @@ -201,11 +201,11 @@ TEST(InferInplace, SingleGradOpInplaceInToOut) { op->SetOutput(GradVarName("X"), {"test2_a", "test2_b", "test2_c"}); prog.MutableBlock(0)->Var("test2_a")->SetType(proto::VarType::LOD_TENSOR); - prog.MutableBlock(0)->Var("test2_a")->SetShape({32, 16}); + prog.MutableBlock(0)->Var("test2_a")->SetShape({32, 16, 1024, 1024}); prog.MutableBlock(0)->Var("test2_b")->SetType(proto::VarType::LOD_TENSOR); prog.MutableBlock(0)->Var("test2_c")->SetType(proto::VarType::LOD_TENSOR); prog.MutableBlock(0)->Var("test2_out"); - prog.MutableBlock(0)->Var("test2_out")->SetShape({32, 16}); + prog.MutableBlock(0)->Var("test2_out")->SetShape({32, 16, 1024, 1024}); auto& infer_inplace = OpInfoMap::Instance().Get(op->Type()).infer_inplace_; auto in_to_outs = infer_inplace(*op, op->Block()); @@ -233,12 +233,12 @@ TEST(InferInplace, MultiOutInplaceInToOut) { prog.MutableBlock(0)->Var("o0"); prog.MutableBlock(0)->Var("y0"); prog.MutableBlock(0)->Var("z0"); - prog.MutableBlock(0)->Var("a0")->SetShape({32, 16}); - prog.MutableBlock(0)->Var("b0")->SetShape({32, 16}); - prog.MutableBlock(0)->Var("c0")->SetShape({32, 16}); - prog.MutableBlock(0)->Var("o0")->SetShape({32, 16}); - prog.MutableBlock(0)->Var("y0")->SetShape({32, 16}); - prog.MutableBlock(0)->Var("z0")->SetShape({32, 16}); + prog.MutableBlock(0)->Var("a0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("b0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("c0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("o0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("y0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("z0")->SetShape({32, 16, 1024, 1024}); auto& infer_inplace = OpInfoMap::Instance().Get(op->Type()).infer_inplace_; auto in_to_outs = infer_inplace(*op, op->Block()); @@ -267,12 +267,12 @@ TEST(InferInplace, MultiGradInplaceInToOut) { prog.MutableBlock(0)->Var("o0"); prog.MutableBlock(0)->Var("y0"); prog.MutableBlock(0)->Var("z0"); - prog.MutableBlock(0)->Var("a0")->SetShape({32, 16}); - prog.MutableBlock(0)->Var("b0")->SetShape({32, 16}); - prog.MutableBlock(0)->Var("c0")->SetShape({32, 16}); - prog.MutableBlock(0)->Var("o0")->SetShape({32, 16}); - prog.MutableBlock(0)->Var("y0")->SetShape({32, 16}); - prog.MutableBlock(0)->Var("z0")->SetShape({32, 16}); + prog.MutableBlock(0)->Var("a0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("b0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("c0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("o0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("y0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("z0")->SetShape({32, 16, 1024, 1024}); auto& infer_inplace = OpInfoMap::Instance().Get(op->Type()).infer_inplace_; auto in_to_outs = infer_inplace(*op, op->Block()); diff --git a/paddle/fluid/operators/controlflow/compare_op.cc b/paddle/fluid/operators/controlflow/compare_op.cc index 688457d4a75168577302e45817ef0463d6ff3718..5d3f9b43f8c08d356319fa0b9ccaf808811d3d39 100644 --- a/paddle/fluid/operators/controlflow/compare_op.cc +++ b/paddle/fluid/operators/controlflow/compare_op.cc @@ -51,6 +51,11 @@ class CompareOpProtoMaker : public framework::OpProtoAndCheckerMaker { comment.type)); AddInput("Y", string::Sprintf("the right hand operand of %s operator", comment.type)); + AddAttr( + "axis", + "The start dimension index for broadcasting Y onto X. [default -1]") + .SetDefault(-1) + .EqualGreaterThan(-1); AddAttr("force_cpu", "Force fill output variable to cpu " "memory. Otherwise, fill output variable to the running " @@ -64,11 +69,6 @@ N-dim tensor. X and Y could be any type. The each element of the Out tensor is calculated by $%s$ )DOC", comment.equation)); - AddAttr( - "axis", - "The start dimension index for broadcasting Y onto X. [default -1]") - .SetDefault(-1) - .EqualGreaterThan(-1); } }; diff --git a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h index 758432fd9e4197302e0bd8f76a1ca7c524026a70..92345b3c0eda42f03bd48c5f64880c56ddbe351b 100644 --- a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h +++ b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h @@ -21,6 +21,7 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/selected_rows.h" +#include "paddle/fluid/operators/jit/kernels.h" #include "paddle/fluid/operators/math/blas.h" namespace paddle { @@ -37,32 +38,24 @@ struct EmbeddingVSumFunctor { const LoDTensor *table_t, const LoDTensor *ids_t, LoDTensor *output_t) { auto *table = table_t->data(); - int64_t row_number = table_t->dims()[0]; - int64_t row_width = table_t->dims()[1]; - int64_t last_dim = output_t->dims()[1]; + int64_t table_height = table_t->dims()[0]; + int64_t table_width = table_t->dims()[1]; + int64_t out_width = output_t->dims()[1]; const int64_t *ids = ids_t->data(); auto ids_lod = ids_t->lod()[0]; - int64_t ids_count = ids_t->numel() / ids_lod.back(); - + int64_t idx_width = ids_t->numel() / ids_lod.back(); auto *output = output_t->mutable_data(context.GetPlace()); - auto blas = math::GetBlas(context); - for (int64_t i = 0; i != ids_lod.size() - 1; ++i) { - size_t begin = ids_lod[i] * ids_count; - for (int64_t j = 0; j != ids_count; ++j) { - PADDLE_ENFORCE_LT(ids[begin], row_number); - PADDLE_ENFORCE_GE(ids[begin], 0, "ids %d", i); - blas.VCOPY(row_width, table + ids[begin + j] * row_width, - output + i * last_dim + j * row_width); - } + PADDLE_ENFORCE_LE(table_width * idx_width, out_width); - for (int64_t r = (ids_lod[i] + 1) * ids_count; - r < ids_lod[i + 1] * ids_count; ++r) { - PADDLE_ENFORCE_LT(ids[r], row_number); - PADDLE_ENFORCE_GE(ids[r], 0, "ids %d", i); - blas.AXPY(row_width, 1., table + ids[r] * row_width, - output + i * last_dim + (r % ids_count) * row_width); - } + jit::emb_seq_pool_attr_t attr(table_height, table_width, 0, idx_width, + out_width, jit::SeqPoolType::kSum); + for (int64_t i = 0; i != ids_lod.size() - 1; ++i) { + attr.index_height = ids_lod[i + 1] - ids_lod[i]; + auto emb_seqpool = jit::Get, + platform::CPUPlace>(attr); + emb_seqpool(table, ids + ids_lod[i] * idx_width, output + i * out_width, + &attr); } } }; diff --git a/paddle/fluid/operators/jit/benchmark.cc b/paddle/fluid/operators/jit/benchmark.cc index 77a2d04ebf176ca0807fa155f87261fcd838da04..3348778ee782ef0cdd1df4c3c4b24060436d7d79 100644 --- a/paddle/fluid/operators/jit/benchmark.cc +++ b/paddle/fluid/operators/jit/benchmark.cc @@ -301,6 +301,37 @@ void BenchSeqPoolKernel() { } } +template +void BenchEmbSeqPoolKernel() { + std::vector pool_types = {jit::SeqPoolType::kSum}; + int64_t tbl_h = 1e4; + for (int tbl_w : {10, 16, 256}) { + Tensor table; + table.Resize({tbl_h, tbl_w}); + RandomVec(tbl_h * tbl_w, table.mutable_data(PlaceType()), -2.f, 2.f); + const T* table_data = table.data(); + for (auto type : pool_types) { + for (int idx_w : {1, 2, 10, 16}) { + for (int idx_h : {1, 2, 9, 13, 16}) { + int64_t out_w = tbl_w * idx_w; + jit::emb_seq_pool_attr_t attr(tbl_h, tbl_w, idx_h, idx_w, out_w, + type); + Tensor idx, out; + idx.Resize({idx_h, idx_w}); + out.Resize({out_w}); + RandomVec(idx_h * idx_w, + idx.mutable_data(PlaceType()), 0, + tbl_h - 1); + const int64_t* idx_data = idx.data(); + T* o_data = out.mutable_data(PlaceType()); + BenchAllImpls, PlaceType>( + attr, table_data, idx_data, o_data, &attr); + } + } + } + } +} + template void BenchMatMulKernel() { for (int m : {1, 2, 3, 4}) { @@ -441,6 +472,11 @@ BENCH_FP32_CPU(kGRUHtPart2) { BenchGRUKernel(); } // seq pool function BENCH_FP32_CPU(kSeqPool) { BenchSeqPoolKernel(); } +// embedding seq pool function +BENCH_FP32_CPU(kEmbSeqPool) { + BenchEmbSeqPoolKernel(); +} + // matmul BENCH_FP32_CPU(kMatMul) { BenchMatMulKernel(); } diff --git a/paddle/fluid/operators/jit/gen/CMakeLists.txt b/paddle/fluid/operators/jit/gen/CMakeLists.txt index efc7eb79d36c5cf9fac4ac40db4e2e28cb242e22..294f73d9646c93132e464a032e93562094663a73 100644 --- a/paddle/fluid/operators/jit/gen/CMakeLists.txt +++ b/paddle/fluid/operators/jit/gen/CMakeLists.txt @@ -31,3 +31,4 @@ USE_JITKERNEL_GEN(kNCHW16CMulNC) USE_JITKERNEL_GEN(kSeqPool) USE_JITKERNEL_GEN(kHMax) USE_JITKERNEL_GEN(kHSum) +USE_JITKERNEL_GEN(kEmbSeqPool) diff --git a/paddle/fluid/operators/jit/gen/embseqpool.cc b/paddle/fluid/operators/jit/gen/embseqpool.cc new file mode 100644 index 0000000000000000000000000000000000000000..23837a3fb9886ae8a839d4b31bd57916168ea53c --- /dev/null +++ b/paddle/fluid/operators/jit/gen/embseqpool.cc @@ -0,0 +1,149 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + +#include "paddle/fluid/operators/jit/gen/embseqpool.h" +#include // offsetof +#include +#include "paddle/fluid/operators/jit/gen/act.h" // for exp_float_consts ones +#include "paddle/fluid/operators/jit/registry.h" +#include "paddle/fluid/platform/cpu_info.h" + +namespace paddle { +namespace operators { +namespace jit { +namespace gen { + +void EmbSeqPoolJitCode::genCode() { + preCode(); + constexpr int block = YMM_FLOAT_BLOCK; + constexpr int max_num_regs = 8; + const int num_block = tbl_w_ / block; + const int num_groups = num_block / max_num_regs; + const size_t block_size = sizeof(float) * block; + std::vector groups(num_groups, max_num_regs); + int rest_num_regs = num_block % max_num_regs; + if (rest_num_regs > 0) { + groups.push_back(rest_num_regs); + } + + // protect param_dst + mov(reg_ptr_param_dst, param_dst); + mov(reg_idx_width_in_byte, + qword[param_attr + offsetof(emb_seq_pool_attr_t, index_width)]); + mov(reg_idx_height, + qword[param_attr + offsetof(emb_seq_pool_attr_t, index_height)]); + mov(rax, sizeof(int64_t)); + mul(reg_idx_width_in_byte); + mov(reg_idx_width_in_byte, rax); + const size_t tbl_width_in_byte = sizeof(float) * tbl_w_; + int acc_num_regs = 0; + for (int num_regs : groups) { + Label l_next_idx_w, l_next_idx_h, l_save_now; + xor_(reg_idx_w_i_in_byte, reg_idx_w_i_in_byte); + mov(reg_ptr_dst_i, reg_ptr_param_dst); + add(reg_ptr_dst_i, acc_num_regs * block_size); + + L(l_next_idx_w); + { + // h == 0 + mov(reg_ptr_idx_i, param_idx); + add(reg_ptr_idx_i, reg_idx_w_i_in_byte); + mov(reg_idx, qword[reg_ptr_idx_i]); + mov(rax, tbl_width_in_byte); + mul(reg_idx); + mov(reg_ptr_tbl_i, rax); // reg is offset now + add(reg_ptr_tbl_i, param_tbl); // reg is ptr_i now + size_t w_offset = 0; + for (int reg_i = 0; reg_i < num_regs; ++reg_i) { + vmovups(ymm_t(reg_i + num_regs), ptr[reg_ptr_tbl_i + w_offset]); + w_offset += block_size; + } + add(reg_ptr_idx_i, reg_idx_width_in_byte); + + // end condition of idx h + mov(reg_idx_h_end, reg_idx_height); + mov(rax, reg_idx_width_in_byte); + mul(reg_idx_h_end); + mov(reg_idx_h_end, rax); + add(reg_idx_h_end, reg_idx_w_i_in_byte); + add(reg_idx_h_end, param_idx); + + cmp(reg_ptr_idx_i, reg_idx_h_end); + jge(l_save_now, T_NEAR); + L(l_next_idx_h); + { + mov(reg_idx, qword[reg_ptr_idx_i]); + mov(reg_ptr_tbl_i, reg_idx); + mov(rax, tbl_width_in_byte); + mul(reg_idx); + mov(reg_ptr_tbl_i, rax); + add(reg_ptr_tbl_i, param_tbl); + size_t w_offset = 0; + for (int reg_i = 0; reg_i < num_regs; ++reg_i) { + vmovups(ymm_t(reg_i), ptr[reg_ptr_tbl_i + w_offset]); + vaddps(ymm_t(reg_i + num_regs), ymm_t(reg_i + num_regs), + ymm_t(reg_i)); + w_offset += block_size; + } + add(reg_ptr_idx_i, reg_idx_width_in_byte); + cmp(reg_ptr_idx_i, reg_idx_h_end); + jl(l_next_idx_h, T_NEAR); + } // end of idx h + L(l_save_now); + // avg or sqrt here, if needed + w_offset = 0; + for (int reg_i = 0; reg_i < num_regs; ++reg_i) { + vmovups(ptr[reg_ptr_dst_i + w_offset], ymm_t(reg_i + num_regs)); + w_offset += block_size; + } + add(reg_ptr_dst_i, tbl_width_in_byte); + add(reg_idx_w_i_in_byte, sizeof(int64_t)); + cmp(reg_idx_w_i_in_byte, reg_idx_width_in_byte); + jl(l_next_idx_w, T_NEAR); + } // end of idx w + + acc_num_regs += num_regs; + add(param_tbl, num_regs * block_size); // do not use acc_num_regs + } // end of groups + postCode(); +} + +class EmbSeqPoolCreator : public JitCodeCreator { + public: + bool UseMe(const emb_seq_pool_attr_t& attr) const override { + return platform::MayIUse(platform::avx) && + attr.table_width % YMM_FLOAT_BLOCK == 0; + } + size_t CodeSize(const emb_seq_pool_attr_t& attr) const override { + return 96 + (attr.table_width / YMM_FLOAT_BLOCK) * 96 * 8; + } + std::unique_ptr CreateJitCode( + const emb_seq_pool_attr_t& attr) const override { + PADDLE_ENFORCE_GT(attr.table_height, 0); + PADDLE_ENFORCE_GT(attr.table_width, 0); + PADDLE_ENFORCE_GT(attr.index_height, 0); + PADDLE_ENFORCE_GT(attr.index_width, 0); + PADDLE_ENFORCE_GT(attr.out_width, 0); + return make_unique(attr, CodeSize(attr)); + } +}; + +} // namespace gen +} // namespace jit +} // namespace operators +} // namespace paddle + +namespace gen = paddle::operators::jit::gen; + +REGISTER_JITKERNEL_GEN(kEmbSeqPool, gen::EmbSeqPoolCreator); diff --git a/paddle/fluid/operators/jit/gen/embseqpool.h b/paddle/fluid/operators/jit/gen/embseqpool.h new file mode 100644 index 0000000000000000000000000000000000000000..5afcfbdc1786bef160864fcde06f8738207751be --- /dev/null +++ b/paddle/fluid/operators/jit/gen/embseqpool.h @@ -0,0 +1,81 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + +#pragma once + +#include +#include "glog/logging.h" +#include "paddle/fluid/operators/jit/gen/jitcode.h" +#include "paddle/fluid/platform/enforce.h" + +namespace paddle { +namespace operators { +namespace jit { +namespace gen { + +class EmbSeqPoolJitCode : public JitCode { + public: + explicit EmbSeqPoolJitCode(const emb_seq_pool_attr_t& attr, + size_t code_size = 256 * 1024, + void* code_ptr = nullptr) + : JitCode(code_size, code_ptr), + tbl_w_(attr.table_width), + type_(attr.pool_type) { + if (type_ != SeqPoolType::kSum) { + LOG(FATAL) << "Only support sum pool yet "; + } + this->genCode(); + } + + std::string name() const override { + std::string base = "EmbSeqPoolJitCode"; + if (type_ == SeqPoolType::kSum) { + base += "_Sum"; + } else if (type_ == SeqPoolType::kAvg) { + base += "_Avg"; + } else if (type_ == SeqPoolType::kSqrt) { + base += "_Sqrt"; + } + base += ("_W" + std::to_string(tbl_w_)); + return base; + } + void genCode() override; + + private: + int tbl_w_; + SeqPoolType type_; + reg64_t param_tbl{abi_param1}; + reg64_t param_idx{abi_param2}; + reg64_t param_dst{abi_param3}; + reg64_t param_attr{abi_param4}; + + reg64_t reg_tmp{rax}; + + reg64_t reg_idx_width_in_byte{r8}; + reg64_t reg_idx_height{r9}; + + reg64_t reg_ptr_tbl_i{r10}; + reg64_t reg_idx{r10}; // could use same of reg_ptr_tbl_i + reg64_t reg_ptr_idx_i{r11}; + reg64_t reg_ptr_dst_i{r12}; + reg64_t reg_ptr_param_dst{r13}; // rdx is used in mul so protect param_dst + + reg64_t reg_idx_w_i_in_byte{r14}; + reg64_t reg_idx_h_end{r15}; +}; + +} // namespace gen +} // namespace jit +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/jit/gen/seqpool.h b/paddle/fluid/operators/jit/gen/seqpool.h index 4108ee2f46433f6dc846cbdd3a8f8f9b15cc0c67..e909bc7c7939ee5cb7a2d367c7a452b96e6a91c2 100644 --- a/paddle/fluid/operators/jit/gen/seqpool.h +++ b/paddle/fluid/operators/jit/gen/seqpool.h @@ -32,7 +32,7 @@ class SeqPoolJitCode : public JitCode { : JitCode(code_size, code_ptr), w_(attr.w), type_(attr.type) { if (!(type_ == SeqPoolType::kSum || type_ == SeqPoolType::kAvg || type_ == SeqPoolType::kSqrt)) { - LOG(FATAL) << "Only support sum pool yet "; + LOG(FATAL) << "Only supported pool type: sum, avg and sqrt."; } fp_h_[0] = 1.f; this->genCode(); diff --git a/paddle/fluid/operators/jit/helper.cc b/paddle/fluid/operators/jit/helper.cc index e7292fe2bd8031aa5bbff68e7c2305a238085bf1..a76653613289892c4bb41596f998c5f4cc131fd7 100644 --- a/paddle/fluid/operators/jit/helper.cc +++ b/paddle/fluid/operators/jit/helper.cc @@ -54,6 +54,7 @@ const char* to_string(KernelType kt) { ONE_CASE(kHMax); ONE_CASE(kHSum); ONE_CASE(kSoftmax); + ONE_CASE(kEmbSeqPool); default: PADDLE_THROW("Not support type: %d, or forget to add it.", kt); return "NOT JITKernel"; diff --git a/paddle/fluid/operators/jit/helper.h b/paddle/fluid/operators/jit/helper.h index d5773d65940127ea0a9b77ed2760bd371b778f4c..07998588a5a560f9c2ad7cc765b66e76e87da6f6 100644 --- a/paddle/fluid/operators/jit/helper.h +++ b/paddle/fluid/operators/jit/helper.h @@ -172,6 +172,15 @@ inline std::ostream& operator<<(std::ostream& os, const seq_pool_attr_t& attr) { return os; } +inline std::ostream& operator<<(std::ostream& os, + const emb_seq_pool_attr_t& attr) { + os << "table_height[" << attr.table_height << "],table_width[" + << attr.table_width << "],index_height[" << attr.index_height + << "],index_width[" << attr.index_width << "],output_width[" + << attr.out_width << "],pool_type[" << to_string(attr.pool_type) << "]"; + return os; +} + inline std::ostream& operator<<(std::ostream& os, const matmul_attr_t& attr) { os << "M[" << attr.m << "],N[" << attr.n << "],K[" << attr.k << "]"; return os; diff --git a/paddle/fluid/operators/jit/kernel_base.h b/paddle/fluid/operators/jit/kernel_base.h index 4a8f61146a1921fa1d5f6b7e15af40cd45d31a22..20b6a32bef9860c52ab4423395a8e00f719b0210 100644 --- a/paddle/fluid/operators/jit/kernel_base.h +++ b/paddle/fluid/operators/jit/kernel_base.h @@ -13,6 +13,7 @@ * limitations under the License. */ #pragma once +#include #include "paddle/fluid/operators/jit/macro.h" #include "paddle/fluid/platform/macros.h" @@ -20,34 +21,35 @@ namespace paddle { namespace operators { namespace jit { -// TODO(TJ): reorder by alphabet typedef enum { kNone = 0, - kVMul = 1, - kVAdd = 2, - kVAddRelu, - kVSub, - kVScal, - kVAddBias, - kVRelu, - kVIdentity, - kVSquare, - kVExp, - kVSigmoid, - kVTanh, - kLSTMCtHt, - kLSTMC1H1, + // sort by alphabet + kCRFDecoding = 1, + kEmbSeqPool = 2, kGRUH1, kGRUHtPart1, kGRUHtPart2, - kCRFDecoding, + kHSum, // horizontal max + kHMax, // horizontal sum + kLSTMCtHt, + kLSTMC1H1, kLayerNorm, + kMatMul, kNCHW16CMulNC, kSeqPool, - kMatMul, - kHSum, // horizontal max - kHMax, // horizontal sum kSoftmax, + kVAdd, + kVAddBias, + kVAddRelu, + kVExp, + kVIdentity, + kVMul, + kVRelu, + kVScal, + kVSigmoid, + kVSquare, + kVSub, + kVTanh, } KernelType; typedef enum { @@ -145,6 +147,32 @@ struct SeqPoolTuples { typedef void (*func_type)(const T*, T*, const seq_pool_attr_t*); }; +typedef struct emb_seq_pool_attr_s { + int64_t table_height, table_width; + int64_t index_height, index_width; + int64_t out_width; + SeqPoolType pool_type; + emb_seq_pool_attr_s() = default; + explicit emb_seq_pool_attr_s(int64_t tbl_height, int64_t tbl_width, + int64_t idx_height, int64_t idx_width, + int64_t output_width, + SeqPoolType seqpool_type = SeqPoolType::kSum) + : table_height(tbl_height), + table_width(tbl_width), + index_height(idx_height), + index_width(idx_width), + out_width(output_width), + pool_type(seqpool_type) {} +} emb_seq_pool_attr_t; + +template +struct EmbSeqPoolTuples { + typedef T data_type; + typedef emb_seq_pool_attr_t attr_type; + typedef void (*func_type)(const T*, const int64_t*, T*, + const emb_seq_pool_attr_t*); +}; + typedef struct matmul_attr_s { int m, n, k; void* packed_weight{nullptr}; diff --git a/paddle/fluid/operators/jit/kernel_key.cc b/paddle/fluid/operators/jit/kernel_key.cc index 1e4a8884e78c5d3c1748988f05ecf461a6f0eb94..e659c6d254391f09ac8692e0b7602c65e1afd47d 100644 --- a/paddle/fluid/operators/jit/kernel_key.cc +++ b/paddle/fluid/operators/jit/kernel_key.cc @@ -56,6 +56,11 @@ size_t JitCodeKey(const matmul_attr_t& attr) { return (key << shift * 2) + ((static_cast(attr.n)) << shift) + attr.k; } +template <> +size_t JitCodeKey(const emb_seq_pool_attr_t& attr) { + return attr.table_width; +} + } // namespace jit } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt b/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt index f9e5aea32e7cd48e9b39c4c3ee0e30f4a5c84f6f..d209f31007255b3a90fdeeb4d609311b80bdc7b5 100644 --- a/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt +++ b/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt @@ -13,3 +13,4 @@ USE_JITKERNEL_MORE(kVSigmoid, mkl) USE_JITKERNEL_MORE(kVTanh, mkl) USE_JITKERNEL_MORE(kSeqPool, mkl) USE_JITKERNEL_MORE(kSoftmax, mkl) +USE_JITKERNEL_MORE(kEmbSeqPool, mkl) diff --git a/paddle/fluid/operators/jit/more/mkl/mkl.cc b/paddle/fluid/operators/jit/more/mkl/mkl.cc index 4c999131ab116ebe3484355158993558b02cc4b2..29a451f832fa745f8e1f5a45fd934f09e1f41e76 100644 --- a/paddle/fluid/operators/jit/more/mkl/mkl.cc +++ b/paddle/fluid/operators/jit/more/mkl/mkl.cc @@ -174,6 +174,16 @@ bool SeqPoolKernel::UseMe(const seq_pool_attr_t& attr) const { return true; } +template <> +bool EmbSeqPoolKernel::UseMe(const emb_seq_pool_attr_t& attr) const { + return true; +} + +template <> +bool EmbSeqPoolKernel::UseMe(const emb_seq_pool_attr_t& attr) const { + return true; +} + template <> bool MatMulKernel::UseMe(const matmul_attr_t& attr) const { return platform::MayIUse(platform::avx); @@ -227,6 +237,7 @@ REGISTER_MKL_KERNEL(kVSquare, VSquare); REGISTER_MKL_KERNEL(kVSigmoid, VSigmoid); REGISTER_MKL_KERNEL(kVTanh, VTanh); REGISTER_MKL_KERNEL(kSeqPool, SeqPool); +REGISTER_MKL_KERNEL(kEmbSeqPool, EmbSeqPool); REGISTER_MKL_KERNEL(kSoftmax, Softmax); #undef REGISTER_MKL_KERNEL diff --git a/paddle/fluid/operators/jit/more/mkl/mkl.h b/paddle/fluid/operators/jit/more/mkl/mkl.h index 8130b87326f1887f232022ab30fa7bf42b0723e7..9a72ba83022de2beeb760772ee8489477befdd7e 100644 --- a/paddle/fluid/operators/jit/more/mkl/mkl.h +++ b/paddle/fluid/operators/jit/more/mkl/mkl.h @@ -18,6 +18,7 @@ #include #include #include "paddle/fluid/operators/jit/kernel_base.h" +#include "paddle/fluid/platform/enforce.h" namespace paddle { namespace operators { @@ -91,6 +92,32 @@ void SeqPool(const T* x, T* y, const seq_pool_attr_t* attr) { } } +template +void EmbSeqPool(const T* table, const int64_t* idx, T* out, + const emb_seq_pool_attr_t* attr) { + PADDLE_ENFORCE_EQ(attr->table_width * attr->index_width, attr->out_width); + auto check_idx_value_valid = [&](int64_t i) { + PADDLE_ENFORCE_LT(idx[i], attr->table_height, "idx value: %d, i: %d", + idx[i], i); + PADDLE_ENFORCE_GE(idx[i], 0, "idx value: %d, i: %d", idx[i], i); + }; + + for (int64_t w = 0; w != attr->index_width; ++w) { + check_idx_value_valid(w); + VCopy(table + idx[w] * attr->table_width, out + w * attr->table_width, + attr->table_width); + } + + for (int64_t h = 1; h < attr->index_height; ++h) { + for (int64_t w = 0; w < attr->index_width; ++w) { + int64_t i = h * attr->index_width + w; + check_idx_value_valid(i); + VAXPY(static_cast(1), table + idx[i] * attr->table_width, + out + w * attr->table_width, attr->table_width); + } + } +} + template void ASum(const T* x, T* res, int n); @@ -142,6 +169,8 @@ DECLARE_MKL_KERNEL(VSquare, XYNTuples); DECLARE_MKL_KERNEL(SeqPool, SeqPoolTuples); +DECLARE_MKL_KERNEL(EmbSeqPool, EmbSeqPoolTuples); + DECLARE_MKL_KERNEL(Softmax, SoftmaxTuples); #undef DECLARE_MKL_KERNEL diff --git a/paddle/fluid/operators/jit/refer/CMakeLists.txt b/paddle/fluid/operators/jit/refer/CMakeLists.txt index 9f2935828ca300dbdb71b0fefb6b9883cb45e4b0..218d801c084be455538628d1c1028d8e52142894 100644 --- a/paddle/fluid/operators/jit/refer/CMakeLists.txt +++ b/paddle/fluid/operators/jit/refer/CMakeLists.txt @@ -32,3 +32,4 @@ USE_JITKERNEL_REFER(kVSquare) USE_JITKERNEL_REFER(kHSum) USE_JITKERNEL_REFER(kHMax) USE_JITKERNEL_REFER(kSoftmax) +USE_JITKERNEL_REFER(kEmbSeqPool) diff --git a/paddle/fluid/operators/jit/refer/refer.cc b/paddle/fluid/operators/jit/refer/refer.cc index b8adb40ec7e1b64df2b04a3201292db235af7b19..7e7dd6960b66e4e2f77eca6e96604f2a86553120 100644 --- a/paddle/fluid/operators/jit/refer/refer.cc +++ b/paddle/fluid/operators/jit/refer/refer.cc @@ -57,4 +57,6 @@ REGISTER_REFER_KERNEL(kHSum, HSum); REGISTER_REFER_KERNEL(kSoftmax, Softmax); +REGISTER_REFER_KERNEL(kEmbSeqPool, EmbSeqPool); + #undef REGISTER_REFER_KERNEL diff --git a/paddle/fluid/operators/jit/refer/refer.h b/paddle/fluid/operators/jit/refer/refer.h index 0c4a985f8e8ece0a6169478fa3a9b111f5a6f3b4..fd1193aa41e50e3ede7f61588dc72389279bb95d 100644 --- a/paddle/fluid/operators/jit/refer/refer.h +++ b/paddle/fluid/operators/jit/refer/refer.h @@ -16,6 +16,7 @@ #include #include +#include #include "paddle/fluid/operators/jit/helper.h" #include "paddle/fluid/operators/jit/kernel_base.h" #include "paddle/fluid/platform/enforce.h" @@ -414,6 +415,37 @@ void Softmax(const T* x, T* y, int n, int bs = 1) { } } +// embedding seq pool +// table is a matrix with (tbl_h, tbl_w) +// idx is a matrix with (idx_h, idx_w) +// output is a vector with length tbl_w * idx_w +template +void EmbSeqPool(const T* table, const int64_t* idx, T* out, + const emb_seq_pool_attr_t* attr) { + PADDLE_ENFORCE_EQ(attr->table_width * attr->index_width, attr->out_width); + + auto check_idx_value_valid = [&](int64_t i) { + PADDLE_ENFORCE_LT(idx[i], attr->table_height, "idx value: %d, i: %d", + idx[i], i); + PADDLE_ENFORCE_GE(idx[i], 0, "idx value: %d, i: %d", idx[i], i); + }; + + for (int64_t w = 0; w != attr->index_width; ++w) { + check_idx_value_valid(w); + std::memcpy(out + w * attr->table_width, table + idx[w] * attr->table_width, + attr->table_width * sizeof(T)); + } + + for (int64_t h = 1; h < attr->index_height; ++h) { + for (int64_t w = 0; w < attr->index_width; ++w) { + int64_t i = h * attr->index_width + w; + check_idx_value_valid(i); + VAdd(table + idx[i] * attr->table_width, out + w * attr->table_width, + out + w * attr->table_width, attr->table_width); + } + } +} + #define DECLARE_REFER_KERNEL(name, tuples) \ template \ class name##Kernel : public ReferKernel> { \ @@ -462,6 +494,8 @@ DECLARE_REFER_KERNEL(HSum, XRNTuples); DECLARE_REFER_KERNEL(Softmax, SoftmaxTuples); +DECLARE_REFER_KERNEL(EmbSeqPool, EmbSeqPoolTuples); + #undef DECLARE_REFER_KERNEL } // namespace refer diff --git a/paddle/fluid/operators/jit/test.cc b/paddle/fluid/operators/jit/test.cc index 85b50b79d95070a56da384d88c356294b7ed9f9f..2632bfb6de1751982c5c836fc96bf57d5b2844d6 100644 --- a/paddle/fluid/operators/jit/test.cc +++ b/paddle/fluid/operators/jit/test.cc @@ -270,6 +270,32 @@ struct TestFuncWithRefer, std::vector, std::vector, } }; +template +struct TestFuncWithRefer, std::vector, + std::vector, std::vector, + typename jit::EmbSeqPoolTuples::attr_type> { + void operator()(const typename jit::EmbSeqPoolTuples::func_type tgt, + const std::vector& table, const std::vector& idx, + const std::vector& oref, + const typename jit::EmbSeqPoolTuples::attr_type& attr) { + EXPECT_TRUE(tgt != nullptr); + EXPECT_EQ(table.size(), + static_cast(attr.table_height * attr.table_width)); + EXPECT_EQ(idx.size(), + static_cast(attr.index_height * attr.index_width)); + EXPECT_EQ(oref.size(), + static_cast(attr.table_width * attr.index_width)); + const T* table_data = table.data(); + const int64_t* idx_data = idx.data(); + const T* oref_data = oref.data(); + int o_w = oref.size(); + std::vector out(o_w); + T* o_data = out.data(); + tgt(table_data, idx_data, o_data, &attr); + ExpectEQ(o_data, oref_data, o_w); + } +}; + template struct TestFuncWithRefer, std::vector, std::vector, std::vector, @@ -644,6 +670,40 @@ void TestSoftmaxKernel() { } } +template +void TestEmbSeqPoolKernel() { + VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); + int64_t tbl_h = 1e4; + std::vector pool_types = { + jit::SeqPoolType::kSum}; // only support sum yet + for (int tbl_w : TestSizes()) { + std::vector table(tbl_h * tbl_w); + RandomVec(tbl_h * tbl_w, table.data(), -2.f, 2.f); + const T* table_data = table.data(); + for (auto type : pool_types) { + for (int idx_w : {1, 2, 10, 16}) { + for (int idx_h : {1, 2, 9, 13, 16}) { + auto ref = jit::GetRefer>(); + EXPECT_TRUE(ref != nullptr); + std::vector idx(idx_h * idx_w); + RandomVec(idx_h * idx_w, idx.data(), 0, tbl_h - 1); + int64_t out_w = tbl_w * idx_w; + std::vector oref(out_w); + const int64_t* idx_data = idx.data(); + T* o_data = oref.data(); + jit::emb_seq_pool_attr_t attr(tbl_h, tbl_w, idx_h, idx_w, out_w, + type); + ref(table_data, idx_data, o_data, &attr); + + TestAllImpls, PlaceType, std::vector, + std::vector, std::vector>(attr, table, idx, + oref, attr); + } + } + } + } +} + template void TestNCHW16CMulNCKernel() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); @@ -878,6 +938,11 @@ TEST(JITKernel, kSoftmax) { TestSoftmaxKernel(); } +TEST(JITKernel, kEmbSeqPool) { + TestEmbSeqPoolKernel(); + TestEmbSeqPoolKernel(); +} + TEST(JITKernel, kNCHW16CMulNC) { TestNCHW16CMulNCKernel(); TestNCHW16CMulNCKernel(); diff --git a/paddle/fluid/operators/ngraph/ngraph_bridge.cc b/paddle/fluid/operators/ngraph/ngraph_bridge.cc index 08d72a5b3978097f4d3dca2e38bef2c3d89cfdc8..36a2efc0ce113e7ca47243d7a1dee1ca998edd03 100644 --- a/paddle/fluid/operators/ngraph/ngraph_bridge.cc +++ b/paddle/fluid/operators/ngraph/ngraph_bridge.cc @@ -36,6 +36,8 @@ std::map("epsilon"); const float momentum = op_attrs.Get("momentum"); + PADDLE_ENFORCE( + data_layout == "NHWC" || data_layout == "NCHW" || data_layout == "NC", + "The BatchNorm operator only supports NHWC/NCHW/NC data format"); + if (data_layout == "NHWC") { x = paddle::platform::Nhwc2Nchw(x); } @@ -110,6 +114,9 @@ void BuildBatchNormGradNode( "BN grap input size needs to be 2 or 4"); PADDLE_ENFORCE_EQ(x_shape.size(), dy_shape.size(), "BN grap input and delta size needs to be equal"); + PADDLE_ENFORCE( + data_layout == "NHWC" || data_layout == "NCHW" || data_layout == "NC", + "The BatchNorm operator only supports NHWC/NCHW/NC data format"); if (x_shape.size() == 2) { x = std::make_shared( diff --git a/paddle/fluid/operators/ngraph/ops/cross_entropy_op.h b/paddle/fluid/operators/ngraph/ops/cross_entropy_op.h new file mode 100644 index 0000000000000000000000000000000000000000..f88a2cb94103b2305875655cefaec16263b4bf4f --- /dev/null +++ b/paddle/fluid/operators/ngraph/ops/cross_entropy_op.h @@ -0,0 +1,145 @@ +/*Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include + +#include "ngraph/ngraph.hpp" +#include "paddle/fluid/platform/ngraph_helper.h" + +namespace paddle { +namespace operators { +namespace ngraphs { + +void BuildCrossEntropyNode( + const std::shared_ptr& op, + std::shared_ptr< + std::unordered_map>> + ngb_node_map) { + auto x = paddle::platform::GetInputNode(op, "X", ngb_node_map); + auto label = paddle::platform::GetInputNode(op, "Label", ngb_node_map); + auto label_shape = label->get_shape(); + auto x_shape = x->get_shape(); + auto label_rank = label_shape.size(); + auto x_rank = x_shape.size(); + std::shared_ptr x_2d = x, label_2d = label; + auto label_2d_shape = label_shape, x_2d_shape = x_shape; + + if (label_rank > 2) { + label_2d_shape = paddle::platform::FlattenTo2d(label_shape, label_rank - 1); + label_2d = paddle::platform::NgReshaper(label, label_2d_shape); + } + if (x_rank > 2) { + x_2d_shape = paddle::platform::FlattenTo2d(x_shape, x_rank - 1); + x_2d = paddle::platform::NgReshaper(x, x_2d_shape); + } + + auto batch_size = x_2d_shape.at(0); + auto op_attrs = paddle::framework::AttrReader(op->Attrs()); + const bool is_soft_label = op_attrs.Get("soft_label"); + + std::shared_ptr node_1_hot = label_2d; + if (!is_soft_label) { + auto label_1d = paddle::platform::NgReshaper( + label_2d, ngraph::Shape{label_2d_shape.at(0)}); + node_1_hot = std::make_shared(label_1d, x_2d_shape, 1); + } + if (x->get_element_type() != node_1_hot->get_element_type()) { + node_1_hot = std::make_shared(node_1_hot, + x->get_element_type()); + } + + auto node_log = std::make_shared(x_2d); + auto high_clip = ngraph::op::Constant::create(node_log->get_element_type(), + node_log->get_shape(), {1e20}); + auto low_clip = ngraph::op::Constant::create(node_log->get_element_type(), + node_log->get_shape(), {-1e20}); + auto node_min = std::make_shared(node_log, high_clip); + auto node_max = std::make_shared(node_min, low_clip); + auto node_mul = node_1_hot * node_log; + auto node_sum = + std::make_shared(node_mul, ngraph::AxisSet{1}); + auto node_neg = std::make_shared(node_sum); + auto xe = + paddle::platform::NgReshaper(node_neg, ngraph::Shape{batch_size, 1}); + + if (!is_soft_label) { + auto ignore_index = op_attrs.Get("ignore_index"); + auto ignore_node = ngraph::op::Constant::create( + label->get_element_type(), label_2d_shape, {ignore_index}); + auto not_equal_node = + std::make_shared(label_2d, ignore_node); + auto mask = std::make_shared(not_equal_node, + xe->get_element_type()); + xe = xe * mask; + } + + paddle::platform::SetOutputNode(op, "Y", xe, ngb_node_map); +} + +void BuildCrossEntropyGradNode( + const std::shared_ptr& op, + std::shared_ptr< + std::unordered_map>> + ngb_node_map) { + auto op_attrs = paddle::framework::AttrReader(op->Attrs()); + const bool is_soft_label = op_attrs.Get("soft_label"); + + auto x = paddle::platform::GetInputNode(op, "X", ngb_node_map); + auto label = paddle::platform::GetInputNode(op, "Label", ngb_node_map); + auto dy = paddle::platform::GetInputNode(op, "Y@GRAD", ngb_node_map); + auto x_shape = x->get_shape(); + auto rank = x_shape.size(); + + std::shared_ptr mask; + if (!is_soft_label) { + auto label_shape = label->get_shape(); + label_shape.pop_back(); + label = paddle::platform::NgReshaper(label, label_shape); + + auto ignore_index = op_attrs.Get("ignore_index"); + auto ignore_node = ngraph::op::Constant::create( + label->get_element_type(), label_shape, {ignore_index}); + auto not_equal_node = + std::make_shared(label, ignore_node); + mask = std::make_shared(not_equal_node, + x->get_element_type()); + mask = std::make_shared(mask, x_shape, + ngraph::AxisSet{rank - 1}); + + label = std::make_shared(label, x_shape, rank - 1); + } + + auto dy_shape = dy->get_shape(); + dy_shape.pop_back(); + auto dy_reshape = paddle::platform::NgReshaper(dy, dy_shape); + auto dy_bcast = std::make_shared( + dy_reshape, x_shape, ngraph::AxisSet{rank - 1}); + if (x->get_element_type() != label->get_element_type()) { + label = std::make_shared(label, x->get_element_type()); + } + + auto xe_grad = -label * dy_bcast / x; + + if (!is_soft_label) { + xe_grad = xe_grad * mask; + } + + paddle::platform::SetOutputNode(op, "X@GRAD", xe_grad, ngb_node_map); +} +} // namespace ngraphs +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/ngraph/ops/fill_constant_op.h b/paddle/fluid/operators/ngraph/ops/fill_constant_op.h index 406a4314f89810df192280cc97de245553d5520f..58783bc220fa02879aaebacc5ac55a07b13184f1 100644 --- a/paddle/fluid/operators/ngraph/ops/fill_constant_op.h +++ b/paddle/fluid/operators/ngraph/ops/fill_constant_op.h @@ -46,8 +46,6 @@ void BuildFillConstantNode( ng_dtype = ngraph::element::i64; } else if (data_type == paddle::framework::proto::VarType::INT32) { ng_dtype = ngraph::element::i32; - } else if (data_type == paddle::framework::proto::VarType::BOOL) { - ng_dtype = ngraph::element::boolean; } else { PADDLE_THROW("unsupported data type: %s", data_type); } diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index a4a01ad647b038bd2bfea00fefa30abb19f58b66..c50c38160e0a2daca704a52eceaa2a9a67a9ba9b 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -373,7 +373,13 @@ PYBIND11_MODULE(core, m) { PADDLE_ENFORCE(CheckLoD(new_lod, vectorize(self.dims()).front()), "the provided lod info is invalid"); self.set_lod(new_lod); - }) + }, + py::arg("lod"), R"DOC( + Set LoD of the LoDTensor. + + Args: + lod (List[List[int]]): the lod to be set. + )DOC") .def("set_recursive_sequence_lengths", [](LoDTensor &self, const std::vector> &recursive_sequence_lengths) { @@ -389,7 +395,17 @@ PYBIND11_MODULE(core, m) { CheckLoD(new_offset_lod, vectorize(self.dims()).front()), "the provided recursive_sequence_lengths info is invalid"); self.set_lod(new_offset_lod); - }) + }, + py::arg("recursive_sequence_lengths"), R"DOC( + Set LoD of the LoDTensor according to recursive sequence length. + + For example, if recursive_sequence_lengths=[[2, 3]], meaning that + there are two sequences with length 2 and 3 respectively, the + corresponding lod would be [[0, 2, 2+3]], i.e, [[0, 2, 5]]. + + Args: + recursive_sequence_lengths (List[List[int]]): sequence lengths. + )DOC") .def("lod", [](LoDTensor &self) -> std::vector> { // output the offset-based lod info @@ -398,7 +414,13 @@ PYBIND11_MODULE(core, m) { new_lod.reserve(lod.size()); std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); return new_lod; - }) + }, + R"DOC( + Return the LoD of the LoDTensor. + + Returns: + out (List[List[int]]): the lod of the LoDTensor. + )DOC") // Set above comments of set_lod. .def("recursive_sequence_lengths", [](LoDTensor &self) -> std::vector> { @@ -408,12 +430,25 @@ PYBIND11_MODULE(core, m) { new_lod.reserve(lod.size()); std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); return new_lod; - }) - .def("has_valid_recursive_sequence_lengths", [](LoDTensor &self) -> bool { - // Check that the lod info is valid and match the outermost - // dimension of the LoDTensor data - return CheckLoD(self.lod(), vectorize(self.dims()).front()); - }); + }, + R"DOC( + Return the sequence length of the LoDTensor corresponding to LoD. + + Returns: + out (List[List[int]): the sequence lengths. + )DOC") + .def("has_valid_recursive_sequence_lengths", + [](LoDTensor &self) -> bool { + // Check that the lod info is valid and match the outermost + // dimension of the LoDTensor data + return CheckLoD(self.lod(), vectorize(self.dims()).front()); + }, + R"DOC( + Check whether the lod of the LoDTensor is valid. + + Returns: + out (bool): whether the lod is valid. + )DOC"); py::class_(m, "SelectedRows") .def("__init__", @@ -549,11 +584,45 @@ All parameter, weight, gradient are variables in Paddle. [](Scope &self, const std::string &name) -> Variable * { return self.Var(name); }, + py::arg("name"), + R"DOC( + Find or create variable named :code:`name` in the current scope. + + If the variable named :code:`name` does not exist in the + current scope, the variable would be created. Otherwise, + return the existing variable. + + Args: + name (str): the variable name. + + Returns: + out (core.Variable): the found or created variable. + )DOC", + py::return_value_policy::reference) + .def("find_var", &Scope::FindVar, py::arg("name"), + R"DOC( + Find variable named :code:`name` in the current scope or + its parent scope. Return None if not found. + + Args: + name (str): the variable name. + + Returns: + out (core.Variable|None): the found variable or None. + )DOC", py::return_value_policy::reference) - .def("find_var", &Scope::FindVar, py::return_value_policy::reference) .def("new_scope", [](Scope &self) -> Scope * { return &self.NewScope(); }, + R"DOC( + Create a new sub-scope of the current scope. + + Returns: + out (core._Scope): the created sub-scope. + )DOC", py::return_value_policy::reference) - .def("drop_kids", &Scope::DropKids); + .def("drop_kids", &Scope::DropKids, + R"DOC( + Delete all sub-scopes of the current scope. + )DOC"); m.def("Scope", []() -> Scope * { @@ -561,6 +630,12 @@ All parameter, weight, gradient are variables in Paddle. ScopePool::Instance().Insert(std::unique_ptr(s)); return s; }, + R"DOC( + Create a new scope. + + Returns: + out (core._Scope): the created scope. + )DOC", py::return_value_policy::reference); //! @note: Be careful! PyBind will return std::string as an unicode, not @@ -789,11 +864,13 @@ All parameter, weight, gradient are variables in Paddle. self[i].ShareDataWith(t); self[i].set_lod(t.lod()); }) - .def("append", [](LoDTensorArray &self, const LoDTensor &t) { - self.emplace_back(); - self.back().ShareDataWith(t); - self.back().set_lod(t.lod()); - }); + .def("append", + [](LoDTensorArray &self, const LoDTensor &t) { + self.emplace_back(); + self.back().ShareDataWith(t); + self.back().set_lod(t.lod()); + }, + py::arg("tensor"), "Append a LoDensor to LoDTensorArray."); m.def("IsInplace", [](std::string op) -> bool { return operators::IsInplace(op); }); diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 1135caf4f8c32901d93270d372fdaac702acf006..e7078499cae87b8a255c6c6d4774052e1f768aa7 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -88,6 +88,7 @@ function cmake_gen() { -DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/3.5/include/python3.5m/ -DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.5/lib/libpython3.5m.dylib" WITH_FLUID_ONLY=${WITH_FLUID_ONLY:-ON} + pip3.5 uninstall -y protobuf pip3.5 install --user -r ${PADDLE_ROOT}/python/requirements.txt else exit 1 @@ -101,6 +102,7 @@ function cmake_gen() { -DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/3.6/include/python3.6m/ -DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.6/lib/libpython3.6m.dylib" WITH_FLUID_ONLY=${WITH_FLUID_ONLY:-ON} + pip3.6 uninstall -y protobuf pip3.6 install --user -r ${PADDLE_ROOT}/python/requirements.txt else exit 1 @@ -114,6 +116,7 @@ function cmake_gen() { -DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/3.7/include/python3.7m/ -DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.7/lib/libpython3.7m.dylib" WITH_FLUID_ONLY=${WITH_FLUID_ONLY:-ON} + pip3.7 uninstall -y protobuf pip3.7 install --user -r ${PADDLE_ROOT}/python/requirements.txt else exit 1 @@ -128,31 +131,44 @@ function cmake_gen() { PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27m/bin/python -DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27m/include/python2.7 -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs2/lib/libpython2.7.so" + pip uninstall -y protobuf + pip install -r ${PADDLE_ROOT}/python/requirements.txt elif [ "$1" == "cp27-cp27mu" ]; then export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs4/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.11-ucs2/lib:} export PATH=/opt/python/cp27-cp27mu/bin/:${PATH} PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27mu/bin/python -DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27mu/include/python2.7 -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs4/lib/libpython2.7.so" + pip uninstall -y protobuf + pip install -r ${PADDLE_ROOT}/python/requirements.txt elif [ "$1" == "cp35-cp35m" ]; then export LD_LIBRARY_PATH=/opt/_internal/cpython-3.5.1/lib/:${LD_LIBRARY_PATH} export PATH=/opt/_internal/cpython-3.5.1/bin/:${PATH} export PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/_internal/cpython-3.5.1/bin/python3 -DPYTHON_INCLUDE_DIR:PATH=/opt/_internal/cpython-3.5.1/include/python3.5m -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-3.5.1/lib/libpython3.so" + pip3.5 uninstall -y protobuf + pip3.5 install -r ${PADDLE_ROOT}/python/requirements.txt elif [ "$1" == "cp36-cp36m" ]; then export LD_LIBRARY_PATH=/opt/_internal/cpython-3.6.0/lib/:${LD_LIBRARY_PATH} export PATH=/opt/_internal/cpython-3.6.0/bin/:${PATH} export PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/_internal/cpython-3.6.0/bin/python3 -DPYTHON_INCLUDE_DIR:PATH=/opt/_internal/cpython-3.6.0/include/python3.6m -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-3.6.0/lib/libpython3.so" + pip3.6 uninstall -y protobuf + pip3.6 install -r ${PADDLE_ROOT}/python/requirements.txt elif [ "$1" == "cp37-cp37m" ]; then export LD_LIBRARY_PATH=/opt/_internal/cpython-3.7.0/lib/:${LD_LIBRARY_PATH} export PATH=/opt/_internal/cpython-3.7.0/bin/:${PATH} export PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/_internal/cpython-3.7.0/bin/python3.7 -DPYTHON_INCLUDE_DIR:PATH=/opt/_internal/cpython-3.7.0/include/python3.7m -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-3.7.0/lib/libpython3.so" + pip3.7 uninstall -y protobuf + pip3.7 install -r ${PADDLE_ROOT}/python/requirements.txt fi + else + pip uninstall -y protobuf + pip install -r ${PADDLE_ROOT}/python/requirements.txt fi fi diff --git a/python/paddle/fluid/compiler.py b/python/paddle/fluid/compiler.py index ef0242942838fcca737a10fafbafa61bf520b532..b24cec044f1918ea9962dc10fc3939444bb340e8 100644 --- a/python/paddle/fluid/compiler.py +++ b/python/paddle/fluid/compiler.py @@ -177,7 +177,10 @@ class CompiledProgram(object): # FIXME(dzhwinter): enable_inplace should be after memory_optimize # if turn on python memory optimize, turn off the inplace_pass. - self._build_strategy.enable_inplace = False if self._program._is_mem_optimized else True + if self._build_strategy.memory_optimize is None: + self._build_strategy.memory_optimize = False if main._is_mem_optimized else True + if self._build_strategy.enable_inplace is None: + self._build_strategy.enable_inplace = False if main._is_mem_optimized else True if self._build_strategy.num_trainers > 1 and trainers_endpoints: assert self._build_strategy.num_trainers == len( diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index ef304b11106628f8541b348fb263274a0c4b31e9..15367c724e5304fed78ef58f8a27932e1d6de318 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -557,7 +557,8 @@ class OpProtoHolder(object): return { core.op_proto_and_checker_maker.kOpRoleAttrName(), core.op_proto_and_checker_maker.kOpRoleVarAttrName(), - core.op_proto_and_checker_maker.kOpNameScopeAttrName() + core.op_proto_and_checker_maker.kOpNameScopeAttrName(), + core.op_proto_and_checker_maker.kOpCreationCallstackAttrName() } diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index 3a6753b01f152f61b78a9f04f4fe32136c051a19..539c9675b2d69b599fc63350c0c7c3b14e32995a 100644 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -506,9 +506,9 @@ class While(object): while loop control flow. Args: - cond (Variable): condition used to compare. + cond(Variable): condition used to compare. is_test(bool): A flag indicating whether execution is in test phase. - name (str): The name of this layer. + name(str): The name of this layer. Examples: .. code-block:: python @@ -589,7 +589,8 @@ class While(object): def lod_rank_table(x, level=0): - """LoD Rank Table Operator. Given an input variable **x** and a level number + """ + LoD Rank Table Operator. Given an input variable **x** and a level number of LoD, this layer creates a LodRankTable object. A LoDRankTable object contains a list of bi-element tuples. Each tuple consists of an index and a length, both of which are int type. Refering to specified level of LoD, @@ -883,10 +884,8 @@ def less_than(x, y, force_cpu=None, cond=None, **ignored): return cond -def equal(x, y, cond=None, **ignored): +def equal(x, y, cond=None): """ - **equal** - This layer returns the truth value of :math:`x == y` elementwise. Args: @@ -1458,7 +1457,6 @@ class DynamicRNN(object): Returns: The current timestep in the input sequence. - """ self._assert_in_rnn_block_("step_input") if not isinstance(x, Variable): @@ -1535,8 +1533,7 @@ class DynamicRNN(object): @signature_safe_contextmanager def block(self): """ - The block for user to define operators in RNN. See the class docstring - for more details. + The block for user to define operators in RNN. """ if self.status != DynamicRNN.BEFORE_RNN: raise ValueError("rnn.block() can only be invoke once") @@ -1640,8 +1637,7 @@ class DynamicRNN(object): dtype(str|numpy.dtype): The data type of the initialized memory. Returns: - the memory variable. - + The memory variable. """ self._assert_in_rnn_block_('memory') self._init_zero_idx_() @@ -1740,7 +1736,7 @@ class DynamicRNN(object): def output(self, *outputs): """ - mark the RNN output variables. + Mark the RNN output variables. Args: outputs: The output variables. diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py index b88be66906e806aeee55c1af6235a6fef9da7030..a9b391fd53a98dc05ee2d909a38dcf82cd5880ea 100644 --- a/python/paddle/fluid/layers/io.py +++ b/python/paddle/fluid/layers/io.py @@ -56,7 +56,10 @@ def data(name, Args: name(str): The name/alias of the function - shape(list): Tuple declaring the shape. + shape(list): Tuple declaring the shape. If :code:`append_batch_size` is + True and there is no -1 inside :code:`shape`, it should be + considered as the shape of the each sample. Otherwise, it + should be considered as the shape of the batched data. append_batch_size(bool): 1. If true, it prepends -1 to the shape. For example if shape=[1], the resulting shape is [-1, 1]. diff --git a/python/paddle/fluid/layers/layer_function_generator.py b/python/paddle/fluid/layers/layer_function_generator.py index 09b1b30216b03e71253ca8da1d462db897e1a607..da6c24100452ba26896c8e7c06a76d874b3f51a2 100644 --- a/python/paddle/fluid/layers/layer_function_generator.py +++ b/python/paddle/fluid/layers/layer_function_generator.py @@ -24,7 +24,7 @@ from ..framework import OpProtoHolder, Variable, core, convert_np_dtype_to_dtype from ..layer_helper import LayerHelper __all__ = [ - 'deprecated', 'generate_layer_fn', 'generate_layer_fn_noattr', 'autodoc', + 'deprecated', 'generate_layer_fn', 'generate_activation_fn', 'autodoc', 'templatedoc' ] @@ -89,6 +89,9 @@ def _generate_doc_string_(op_proto, additional_args_lines=None): buf.write('\n') skip_attrs = OpProtoHolder.generated_op_attr_names() + # attr use_mkldnn and is_test also should not be visible to users. + skip_attrs.add("use_mkldnn") + skip_attrs.add("is_test") for each_attr in op_proto.attrs: if each_attr.name in skip_attrs: @@ -226,7 +229,7 @@ def generate_layer_fn(op_type): return func -def generate_layer_fn_noattr(op_type): +def generate_activation_fn(op_type): """Register the Python layer for an Operator without Attribute. Args: @@ -246,6 +249,7 @@ def generate_layer_fn_noattr(op_type): func.__name__ = op_type func.__doc__ = _generate_doc_string_(op_proto) + return func diff --git a/python/paddle/fluid/layers/ops.py b/python/paddle/fluid/layers/ops.py index 3dcf9dc06998be9c38a48f18075cbf99f3dccb1a..6b4dc4ac89af43271ff4cbb51b02fe78af754a7b 100644 --- a/python/paddle/fluid/layers/ops.py +++ b/python/paddle/fluid/layers/ops.py @@ -14,7 +14,7 @@ from __future__ import print_function import os -from .layer_function_generator import generate_layer_fn, generate_layer_fn_noattr +from .layer_function_generator import generate_layer_fn, generate_activation_fn from .. import core from ..framework import convert_np_dtype_to_dtype_ @@ -53,7 +53,7 @@ globals()['_elementwise_div'] = generate_layer_fn('elementwise_div') __all__ += __activations_noattr__ for _OP in set(__activations_noattr__): - globals()[_OP] = generate_layer_fn_noattr(_OP) + globals()[_OP] = generate_activation_fn(_OP) __all__ += ["uniform_random"] diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index fbd04f1eb461268ae98ca74c0bc46cc2717733cb..fe2b3fbbd91f24881341003ad78af06e33772fd4 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -1368,9 +1368,9 @@ class FtrlOptimizer(Optimizer): Args: learning_rate (float|Variable): global learning rate. - l1 (float): - l2 (float): - lr_power (float): + l1 (float): L1 regularization strength. + l2 (float): L2 regularization strength. + lr_power (float): Learning Rate Power. regularization: A Regularizer, such as fluid.regularizer.L2DecayRegularizer. name: A optional name prefix. diff --git a/python/paddle/fluid/parallel_executor.py b/python/paddle/fluid/parallel_executor.py index 22212ae9a216acaab3f295f1f8d091829a0aa471..8586670c2481a0f997e84f33860b6df28b3223ae 100644 --- a/python/paddle/fluid/parallel_executor.py +++ b/python/paddle/fluid/parallel_executor.py @@ -148,6 +148,8 @@ class ParallelExecutor(object): else framework.default_main_program() # FIXME(dzhwinter): enable_inplace should be after memory_optimize # if turn on python memory optimize, turn off the inplace_pass. + if build_strategy.memory_optimize is None: + build_strategy.memory_optimize = False if main._is_mem_optimized else True if build_strategy.enable_inplace is None: build_strategy.enable_inplace = False if main._is_mem_optimized else True scope = scope if scope is not None else executor.global_scope() diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 534411219b500723f3799a08fdf1b7796534376b..289a48aac9cf7ff52295feb722a8116939442425 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -77,6 +77,7 @@ list(REMOVE_ITEM TEST_OPS test_bilinear_interp_op) list(REMOVE_ITEM TEST_OPS test_nearest_interp_op) list(REMOVE_ITEM TEST_OPS test_imperative_resnet) list(REMOVE_ITEM TEST_OPS test_imperative_optimizer) +list(REMOVE_ITEM TEST_OPS test_ir_memory_optimize_transformer) foreach(TEST_OP ${TEST_OPS}) py_test_modules(${TEST_OP} MODULES ${TEST_OP}) endforeach(TEST_OP) @@ -107,6 +108,9 @@ py_test_modules(test_parallel_executor_crf MODULES test_parallel_executor_crf SE py_test_modules(test_parallel_executor_fetch_feed MODULES test_parallel_executor_fetch_feed SERIAL) set_tests_properties(test_parallel_executor_fetch_feed PROPERTIES TIMEOUT 450) py_test_modules(test_parallel_executor_transformer MODULES test_parallel_executor_transformer SERIAL) +if(NOT WIN32) +py_test_modules(test_ir_memory_optimize_transformer MODULES test_ir_memory_optimize_transformer SERIAL) +endif() if(NOT APPLE) py_test_modules(test_image_classification_resnet MODULES test_image_classification_resnet SERIAL) if(CMAKE_BUILD_TYPE STREQUAL "Debug") diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_accuracy_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_accuracy_ngraph_op.py index 84b9198dbf6569b7dbd7bd3c953d5254ece178e8..5298c3c2f6f0113977342ab3e09830027585ada1 100644 --- a/python/paddle/fluid/tests/unittests/ngraph/test_accuracy_ngraph_op.py +++ b/python/paddle/fluid/tests/unittests/ngraph/test_accuracy_ngraph_op.py @@ -15,39 +15,7 @@ from __future__ import print_function import unittest -import numpy as np -from paddle.fluid.tests.unittests.op_test import OpTest - - -class TestNGRAPHAccuracyOp(OpTest): - def setUp(self): - self.op_type = "accuracy" - self.dtype = np.float32 - self.init_dtype() - n = 128 - infer = np.random.random((n, 1)).astype(self.dtype) - indices = np.random.randint(0, 2, (n, 1)) - label = np.random.randint(0, 2, (n, 1)) - self.inputs = {'Out': infer, 'Indices': indices, "Label": label} - num_correct = 0 - for rowid in range(n): - for ele in indices[rowid]: - if ele == label[rowid]: - num_correct += 1 - break - self.outputs = { - 'Accuracy': np.array([num_correct / float(n)]).astype(self.dtype), - 'Correct': np.array([num_correct]).astype("int64"), - 'Total': np.array([n]).astype("int64") - } - self._cpu_only = True - - def init_dtype(self): - pass - - def test_check_output(self): - self.check_output() - +from paddle.fluid.tests.unittests.test_accuracy_op import TestAccuracyOp if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_batch_norm_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_batch_norm_ngraph_op.py index 511173af5e5b2a1d1e50d199b55e7d9ace6584f4..34fb73f3cf7e8b3d906ed4e04d151923aa219ab1 100644 --- a/python/paddle/fluid/tests/unittests/ngraph/test_batch_norm_ngraph_op.py +++ b/python/paddle/fluid/tests/unittests/ngraph/test_batch_norm_ngraph_op.py @@ -17,21 +17,5 @@ from __future__ import print_function import unittest from paddle.fluid.tests.unittests.test_batch_norm_op import TestBatchNormOpTraining, TestBatchNormOpInference - -class TestNGRAPHBatchNormOpTraining(TestBatchNormOpTraining): - def init_kernel_type(self): - super(TestNGRAPHBatchNormOpTraining, self).init_kernel_type() - - -class TestNGRAPHBatchNormOpInference(TestBatchNormOpInference): - def init_kernel_type(self): - super(TestNGRAPHBatchNormOpInference, self).init_kernel_type() - - -class TestNGRAPHBatchNormOpWithReluInference(TestBatchNormOpInference): - def init_kernel_type(self): - super(TestNGRAPHBatchNormOpWithReluInference, self).init_kernel_type() - - if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_conv2d_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_conv2d_ngraph_op.py index dbc8557b4e1c96c13c5a189b44bed4b5f1aabf4f..ff2e865b66a5f1166281c267392b0964ca5b3082 100644 --- a/python/paddle/fluid/tests/unittests/ngraph/test_conv2d_ngraph_op.py +++ b/python/paddle/fluid/tests/unittests/ngraph/test_conv2d_ngraph_op.py @@ -17,60 +17,5 @@ from __future__ import print_function import unittest from paddle.fluid.tests.unittests.test_conv2d_op import TestConv2dOp, TestWithPad, TestWithStride, TestWithGroup, TestWith1x1, TestWithInput1x1Filter1x1 - -class TestNGRAPH(TestConv2dOp): - def setUp(self): - super(TestNGRAPH, self).setUp() - self._cpu_only = True - - def init_kernel_type(self): - super(TestNGRAPH, self).init_kernel_type() - - -class TestNGRAPHWithPad(TestWithPad): - def setUp(self): - super(TestNGRAPHWithPad, self).setUp() - self._cpu_only = True - - def init_kernel_type(self): - super(TestNGRAPHWithPad, self).init_kernel_type() - - -class TestNGRAPHWithStride(TestWithStride): - def setUp(self): - super(TestNGRAPHWithStride, self).setUp() - self._cpu_only = True - - def init_kernel_type(self): - super(TestNGRAPHWithStride, self).init_kernel_type() - - -class TestNGRAPHWithGroup(TestWithGroup): - def setUp(self): - super(TestNGRAPHWithGroup, self).setUp() - self._cpu_only = True - - def init_kernel_type(self): - super(TestNGRAPHWithGroup, self).init_kernel_type() - - -class TestNGRAPHWith1x1(TestWith1x1): - def setUp(self): - super(TestNGRAPHWith1x1, self).setUp() - self._cpu_only = True - - def init_kernel_type(self): - super(TestNGRAPHWith1x1, self).init_kernel_type() - - -class TestNGRAPHWithInput1x1Filter1x1(TestWithInput1x1Filter1x1): - def setUp(self): - super(TestNGRAPHWithInput1x1Filter1x1, self).setUp() - self._cpu_only = True - - def init_kernel_type(self): - super(TestNGRAPHWithInput1x1Filter1x1, self).init_kernel_type() - - if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_cross_entropy_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_cross_entropy_ngraph_op.py new file mode 100644 index 0000000000000000000000000000000000000000..9a185eb97ca6ad4d7987f1a422073d3c8db0d8df --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ngraph/test_cross_entropy_ngraph_op.py @@ -0,0 +1,275 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import paddle.fluid.core as core +from paddle.fluid.tests.unittests.op_test import OpTest, randomize_probability + + +class TestCrossEntropyOp(OpTest): + """Test cross-entropy with discrete one-hot labels. + """ + + def setUp(self): + self.op_type = "cross_entropy" + self.soft_label = False + self.ignore_index = -100 + self.dtype = np.float64 + self.batch_size = 30 + self.class_num = 10 + self._cpu_only = True + + self.init_dtype_type() + self.init_attr_type() + self.init_bs_class_num() + self.init_x() + self.init_label() + self.get_cross_entropy() + + self.inputs = {"X": self.x, "Label": self.label} + self.outputs = {"Y": self.cross_entropy} + self.attrs = { + "soft_label": self.soft_label, + "ignore_index": self.ignore_index + } + + def init_x(self): + self.x = randomize_probability( + self.batch_size, self.class_num, dtype=self.dtype) + + def init_label(self): + self.label = np.random.randint( + 0, self.class_num, (self.batch_size, 1), dtype="int64") + + def get_cross_entropy(self): + self.cross_entropy = np.asmatrix( + [[-np.log(self.x[i][self.label[i][0]])] + for i in range(self.x.shape[0])], + dtype="float64") + + def init_attr_type(self): + pass + + def init_dtype_type(self): + pass + + def init_bs_class_num(self): + pass + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(["X"], "Y", numeric_grad_delta=0.001) + + +class TestCrossEntropyOp2(TestCrossEntropyOp): + """Test cross-entropy with vectorized soft labels. + """ + + def init_label(self): + self.label = np.random.uniform( + 0.1, 1.0, [self.batch_size, self.class_num]).astype(self.dtype) + self.label /= self.label.sum(axis=1, keepdims=True) + + def get_cross_entropy(self): + self.cross_entropy = (-self.label * np.log(self.x)).sum( + axis=1, keepdims=True).astype(self.dtype) + + def init_attr_type(self): + self.soft_label = True + + def init_dtype_type(self): + self.dtype = np.float32 + + def init_bs_class_num(self): + self.batch_size = 5 + self.class_num = 37 + + def test_check_grad(self): + self.check_grad( + ["X"], "Y", max_relative_error=0.05, numeric_grad_delta=0.001) + + +class TestCrossEntropyOp3(TestCrossEntropyOp): + """Test cross-entropy with vectorized one-hot representation of labels. + """ + + def init_label(self): + self.label_index = np.random.randint(0, self.class_num, + (self.batch_size)) + self.label = np.zeros(self.x.shape).astype(self.dtype) + self.label[np.arange(self.batch_size), self.label_index] = 1 + + def get_cross_entropy(self): + self.cross_entropy = np.asmatrix( + [[-np.log(self.x[i][self.label_index[i]])] + for i in range(self.x.shape[0])]).astype(self.dtype) + + def init_attr_type(self): + self.soft_label = True + + def init_dtype_type(self): + self.dtype = np.float32 + + def init_bs_class_num(self): + self.batch_size = 5 + self.class_num = 17 + + def test_check_grad(self): + self.check_grad( + ["X"], "Y", max_relative_error=0.05, numeric_grad_delta=0.001) + + +class TestCrossEntropyOp4(TestCrossEntropyOp): + """Test high rank tensor cross-entropy with discrete one-hot labels. + """ + + def init_x(self): + self.shape = [10, 2, 4] + self.ins_num = np.prod(np.array(self.shape)) + self.X_2d = randomize_probability(self.ins_num, + self.class_num).astype(self.dtype) + self.x = self.X_2d.reshape(self.shape + [self.class_num]) + + def init_label(self): + self.label_2d = np.random.randint( + 0, self.class_num, (self.ins_num, 1), dtype="int64") + self.label = self.label_2d.reshape(self.shape + [1]) + + def get_cross_entropy(self): + cross_entropy_2d = np.asmatrix( + [[-np.log(self.X_2d[i][self.label_2d[i][0]])] + for i in range(self.X_2d.shape[0])]).astype(self.dtype) + self.cross_entropy = np.array(cross_entropy_2d).reshape(self.shape + + [1]) + + def init_attr_type(self): + self.soft_label = False + + def init_dtype_type(self): + self.dtype = np.float64 + + def init_bs_class_num(self): + self.class_num = 10 + + +class TestCrossEntropyOp5(TestCrossEntropyOp): + """Test high rank tensor cross-entropy with vectorized soft labels. + """ + + def init_x(self): + self.shape = [4, 3] + self.ins_num = np.prod(np.array(self.shape)) + self.X_2d = randomize_probability(self.ins_num, + self.class_num).astype(self.dtype) + self.x = self.X_2d.reshape(self.shape + [self.class_num]) + + def init_label(self): + self.label_2d = np.random.uniform( + 0.1, 1.0, [self.ins_num, self.class_num]).astype(self.dtype) + self.label_2d /= self.label_2d.sum(axis=1, keepdims=True) + self.label = self.label_2d.reshape(self.shape + [self.class_num]) + + def get_cross_entropy(self): + cross_entropy_2d = (-self.label_2d * np.log(self.X_2d)).sum( + axis=1, keepdims=True).astype(self.dtype) + self.cross_entropy = np.array(cross_entropy_2d).reshape(self.shape + + [1]) + + def init_attr_type(self): + self.soft_label = True + + def init_dtype_type(self): + self.dtype = np.float32 + + def init_bs_class_num(self): + self.class_num = 37 + + def test_check_grad(self): + self.check_grad( + ["X"], "Y", max_relative_error=0.05, numeric_grad_delta=0.001) + + +class TestCrossEntropyOp6(TestCrossEntropyOp): + """Test high rank tensor cross-entropy with vectorized one-hot representation of labels. + """ + + def init_x(self): + self.shape = [4, 3, 2] + self.ins_num = np.prod(np.array(self.shape)) + self.X_2d = randomize_probability(self.ins_num, + self.class_num).astype(self.dtype) + self.x = self.X_2d.reshape(self.shape + [self.class_num]) + + def init_label(self): + self.label_index_2d = np.random.randint( + 0, self.class_num, (self.ins_num), dtype="int64") + label_2d = np.zeros(self.X_2d.shape) + label_2d[np.arange(self.ins_num), self.label_index_2d] = 1 + self.label = label_2d.reshape(self.shape + [self.class_num]).astype( + self.dtype) + + def get_cross_entropy(self): + cross_entropy_2d = np.asmatrix( + [[-np.log(self.X_2d[i][self.label_index_2d[i]])] + for i in range(self.X_2d.shape[0])]) + self.cross_entropy = np.array(cross_entropy_2d).reshape( + self.shape + [1]).astype(self.dtype) + + def init_attr_type(self): + self.soft_label = True + + def init_dtype_type(self): + self.dtype = np.float32 + + def init_bs_class_num(self): + self.class_num = 17 + + def test_check_grad(self): + self.check_grad( + ["X"], "Y", max_relative_error=0.05, numeric_grad_delta=0.001) + + +class TestCrossEntropyOp7(TestCrossEntropyOp): + """Test cross-entropy with ignore index. + """ + + def init_label(self): + self.label = np.random.randint( + 0, self.class_num, (self.batch_size, 1), dtype="int64") + + def get_cross_entropy(self): + self.cross_entropy = np.asmatrix( + [[-np.log(self.x[i][self.label[i][0]])] + if self.label[i][0] != self.ignore_index else [0] + for i in range(self.x.shape[0])]).astype(self.dtype) + + def init_attr_type(self): + self.soft_label = False + self.ignore_index = 3 + + def init_dtype_type(self): + self.dtype = np.float64 + + def init_bs_class_num(self): + self.batch_size = 30 + self.class_num = 10 + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_elementwise_add_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_elementwise_add_ngraph_op.py index 67f749bfeeb1bb47a8c5bb3486ac3292c8af8164..3fb9af3a542d5e6b0de7d8d839408759abdaedcb 100644 --- a/python/paddle/fluid/tests/unittests/ngraph/test_elementwise_add_ngraph_op.py +++ b/python/paddle/fluid/tests/unittests/ngraph/test_elementwise_add_ngraph_op.py @@ -13,18 +13,9 @@ # limitations under the License. from __future__ import print_function -import unittest -from paddle.fluid.tests.unittests.test_elementwise_add_op import TestElementwiseAddOp - - -class TestNGRAPHElementwiseAddOp(TestElementwiseAddOp): - def setUp(self): - super(TestNGRAPHElementwiseAddOp, self).setUp() - self._cpu_only = True - - def init_input_output(self): - super(TestNGRAPHElementwiseAddOp, self).init_input_output() +import unittest +from paddle.fluid.tests.unittests.test_elementwise_add_op import TestElementwiseAddOp, TestElementwiseAddOp_broadcast_0 if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_fill_constant_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_fill_constant_ngraph_op.py index 835376ffe78f9119a9be6c379998e3a3b50aab43..2b10b8f7a3ac0f978c13bd86824b939e69c5336a 100644 --- a/python/paddle/fluid/tests/unittests/ngraph/test_fill_constant_ngraph_op.py +++ b/python/paddle/fluid/tests/unittests/ngraph/test_fill_constant_ngraph_op.py @@ -13,24 +13,34 @@ # limitations under the License. from __future__ import print_function + import unittest +import numpy as np from paddle.fluid.tests.unittests.test_fill_constant_op import TestFillConstantOp1, TestFillConstantOp2, TestFillConstantOpWithSelectedRows -class TestNGRAPHFillConstantOp1(TestFillConstantOp1): +class TestNGRAPHFillConstantFP64(TestFillConstantOp1): def setUp(self): - super(TestNGRAPHFillConstantOp1, self).setUp() + super(TestNGRAPHFillConstantFP64, self).setUp() + + self.attrs = {'shape': [123, 92], 'value': 3.8, 'dtype': 6} + self.outputs = {'Out': np.full((123, 92), 3.8)} -class TestNGRAPHFillConstantOp2(TestFillConstantOp2): +class TestNGRAPHFillConstantINT32(TestFillConstantOp2): def setUp(self): - super(TestNGRAPHFillConstantOp2, self).setUp() + super(TestNGRAPHFillConstantINT32, self).setUp() + self.attrs = {'shape': [123, 92], 'dtype': 2} + self.outputs = {'Out': np.full((123, 92), 0)} -class TestNGRAPHFillConstantOpWithSelectedRows( - TestFillConstantOpWithSelectedRows): + +class TestNGRAPHFillConstantINT64(TestFillConstantOp2): def setUp(self): - super(TestFillConstantOpWithSelectedRows, self).setUp() + super(TestNGRAPHFillConstantINT64, self).setUp() + + self.attrs = {'shape': [123, 92], 'dtype': 3} + self.outputs = {'Out': np.full((123, 92), 0)} if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_mean_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_mean_ngraph_op.py index 11881ac6e5292ce2beea1b353c6ca857ada28839..b4894734cbcc11cf5eec7401297dc35545aa7268 100644 --- a/python/paddle/fluid/tests/unittests/ngraph/test_mean_ngraph_op.py +++ b/python/paddle/fluid/tests/unittests/ngraph/test_mean_ngraph_op.py @@ -16,12 +16,5 @@ from __future__ import print_function import unittest from paddle.fluid.tests.unittests.test_mean_op import TestMeanOp - -class TestNGRAPHMeanOp(TestMeanOp): - def setUp(self): - super(TestNGRAPHMeanOp, self).setUp() - self._cpu_only = True - - if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_mul_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_mul_ngraph_op.py index a916c8d450f4a218c85f39c31737b2efa0ef926d..549d03f6e92dc7e88ec8618e5f97287bb68ed0d9 100644 --- a/python/paddle/fluid/tests/unittests/ngraph/test_mul_ngraph_op.py +++ b/python/paddle/fluid/tests/unittests/ngraph/test_mul_ngraph_op.py @@ -15,39 +15,7 @@ from __future__ import print_function import unittest -import numpy as np -from paddle.fluid.tests.unittests.op_test import OpTest - - -class TestNGRAPHMulOp(OpTest): - def setUp(self): - self.op_type = "mul" - self.dtype = np.float32 - self.init_dtype_type() - self.inputs = { - 'X': np.random.random((2, 4)).astype(self.dtype), - 'Y': np.random.random((4, 4)).astype(self.dtype) - } - self.outputs = {'Out': np.dot(self.inputs['X'], self.inputs['Y'])} - self._cpu_only = True - - def init_dtype_type(self): - pass - - def test_check_output(self): - self.check_output() - - def test_check_grad_normal(self): - self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.5) - - def test_check_grad_ingore_x(self): - self.check_grad( - ['Y'], 'Out', max_relative_error=0.5, no_grad_set=set("X")) - - def test_check_grad_ingore_y(self): - self.check_grad( - ['X'], 'Out', max_relative_error=0.5, no_grad_set=set('Y')) - +from paddle.fluid.tests.unittests.test_mul_op import TestMulOp, TestMulOp2 if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_pool2d_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_pool2d_ngraph_op.py index 96a2b72d8add9cc765a8536932b72426c8489025..ff82e9fa1d3d343aa7faf56a0bd27d2c9edc1ea4 100644 --- a/python/paddle/fluid/tests/unittests/ngraph/test_pool2d_ngraph_op.py +++ b/python/paddle/fluid/tests/unittests/ngraph/test_pool2d_ngraph_op.py @@ -14,61 +14,25 @@ from __future__ import print_function -from paddle.fluid.tests.unittests.test_pool2d_op import TestPool2D_Op, TestCase1, TestCase2, TestCase3, TestCase4, TestCase5 - - -class TestNGRAPHPool2D_Op(TestPool2D_Op): - def setUp(self): - super(TestNGRAPHPool2D_Op, self).setUp() - self._cpu_only = True - - def init_test_case(self): - super(TestNGRAPHPool2D_Op, self).init_test_case() - - -class TestNGRAPHCase1(TestCase1): - def setUp(self): - super(TestNGRAPHCase1, self).setUp() - self._cpu_only = True - - def init_test_case(self): - super(TestNGRAPHCase1, self).init_test_case() +import unittest - -class TestNGRAPHCase2(TestCase2): - def setUp(self): - super(TestNGRAPHCase2, self).setUp() - self._cpu_only = True - - def init_test_case(self): - super(TestNGRAPHCase2, self).init_test_case() - - -class TestNGRAPHCase3(TestCase3): - def setUp(self): - super(TestNGRAPHCase3, self).setUp() - self._cpu_only = True - - def init_pool_type(self): - super(TestNGRAPHCase3, self).init_pool_type() +from paddle.fluid.tests.unittests.test_pool2d_op import TestPool2D_Op, TestCase1, TestCase2, TestCase3, TestCase4, TestCase5 -class TestNGRAPHCase4(TestCase4): +class TestNGRAPHCeilMode(TestCase1): def setUp(self): - super(TestNGRAPHCase4, self).setUp() - self._cpu_only = True + super(TestNGRAPHCeilMode, self).setUp() - def init_pool_type(self): - super(TestNGRAPHCase4, self).init_pool_type() + def init_ceil_mode(self): + self.ceil_mode = True -class TestNGRAPHCase5(TestCase5): +class TestNGRAPHAdaptive(TestCase1): def setUp(self): - super(TestNGRAPHCase5, self).setUp() - self._cpu_only = True + super(TestNGRAPHAdaptive, self).setUp() - def init_pool_type(self): - super(TestNGRAPHCase5, self).init_pool_type() + def init_adaptive(self): + self.adaptive = True if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_scale_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_scale_ngraph_op.py index 4da5ca4583c65d69ead8d3e9886605a7ad104cc0..8beb44f55e487eef5f1957e9284d4a711c9770aa 100644 --- a/python/paddle/fluid/tests/unittests/ngraph/test_scale_ngraph_op.py +++ b/python/paddle/fluid/tests/unittests/ngraph/test_scale_ngraph_op.py @@ -15,24 +15,5 @@ from __future__ import print_function import unittest from paddle.fluid.tests.unittests.test_scale_op import TestScaleOp, TestScaleOpSelectedRows - -class TestNGRAPHScaleOp(TestScaleOp): - def setUp(self): - super(TestNGRAPHScaleOp, self).setUp() - self._cpu_only = True - - def init_dtype_type(self): - pass - - -class TestNGRAPHScaleOpSelectedRows(TestScaleOpSelectedRows): - def setUp(self): - super(TestNGRAPHScaleOpSelectedRows, self).setUp() - self._cpu_only = True - - def init_dtype_type(self): - pass - - if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_softmax_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_softmax_ngraph_op.py index 81894c6e3872e4617085c6bb4b0219a49c9986fd..0cb08842df0797952c47a63ba2bbb8614c0e8a22 100644 --- a/python/paddle/fluid/tests/unittests/ngraph/test_softmax_ngraph_op.py +++ b/python/paddle/fluid/tests/unittests/ngraph/test_softmax_ngraph_op.py @@ -16,11 +16,5 @@ from __future__ import print_function import unittest from paddle.fluid.tests.unittests.test_softmax_op import TestSoftmaxOp - -class TestSoftmaxNGRAPHOp(TestSoftmaxOp): - def setUp(self): - super(TestSoftmaxNGRAPHOp, self).setUp() - - if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_top_k_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_top_k_ngraph_op.py index fa68df1adf2cfb31c63b70098a6fedc2ab3913aa..d2319c4d921fccb950b1a3059fdecd3b3b044182 100644 --- a/python/paddle/fluid/tests/unittests/ngraph/test_top_k_ngraph_op.py +++ b/python/paddle/fluid/tests/unittests/ngraph/test_top_k_ngraph_op.py @@ -16,30 +16,5 @@ from __future__ import print_function import unittest from paddle.fluid.tests.unittests.test_top_k_op import TestTopkOp, TestTopkOp3d, TestTopkOp2, TestTopkOp3, TestTopkOp4 - -class TestNGRAPHTopkOp(TestTopkOp): - def setUp(self): - super(TestNGRAPHTopkOp, self).setUp() - self._cpu_only = True - - -class TestNGRAPHTopkOp2(TestTopkOp2): - def setUp(self): - super(TestNGRAPHTopkOp2, self).setUp() - self._cpu_only = True - - -class TestNGRAPHTopkOp3(TestTopkOp3): - def setUp(self): - super(TestNGRAPHTopkOp3, self).setUp() - self._cpu_only = True - - -class TestNGRAPHTopkOp4(TestTopkOp4): - def setUp(self): - super(TestNGRAPHTopkOp4, self).setUp() - self._cpu_only = True - - if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/op_test.py b/python/paddle/fluid/tests/unittests/op_test.py index 0fe836683b029698b670bbb9f9bb258c2f3b68a0..823445724302dbde47bc36122c62ef44a7e2394f 100644 --- a/python/paddle/fluid/tests/unittests/op_test.py +++ b/python/paddle/fluid/tests/unittests/op_test.py @@ -14,6 +14,7 @@ from __future__ import print_function +import os import unittest import numpy as np import random @@ -374,6 +375,9 @@ class OpTest(unittest.TestCase): return [] places = [fluid.CPUPlace()] cpu_only = self._cpu_only if hasattr(self, '_cpu_only') else False + use_ngraph = bool(os.getenv("FLAGS_use_ngraph", False)) + if use_ngraph: + cpu_only = True if core.is_compiled_with_cuda() and core.op_support_gpu(self.op_type)\ and not cpu_only: places.append(core.CUDAPlace(0)) diff --git a/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py b/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py index c429c8af7d37cb4e209edc41f704868afe054829..a94487e67dc90d4df935867f841bc567c37c8aa2 100644 --- a/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py +++ b/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py @@ -79,7 +79,7 @@ class TestParallelExecutorBase(unittest.TestCase): if use_reduce else fluid.BuildStrategy.ReduceStrategy.AllReduce build_strategy.fuse_elewise_add_act_ops = fuse_elewise_add_act_ops build_strategy.fuse_relu_depthwise_conv = fuse_relu_depthwise_conv - build_strategy.memory_optimize = use_ir_memory_optimize + build_strategy.memory_optimize = False if memory_opt else use_ir_memory_optimize # python memory optimization is conflict with inplace pass. # Use ir graph memory optimization after inplace pass is the correct way. build_strategy.enable_inplace = False if memory_opt else enable_inplace diff --git a/python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py index 03471a4432f2b6bf6220e79e99aa506628b1535b..c1fb53ecf52d953fa470998c120930b2bec6325b 100644 --- a/python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py +++ b/python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py @@ -121,6 +121,8 @@ class TestMNIST(TestParallelExecutorBase): regularization=fluid.regularizer.L2Decay(1e-6)) return optimizer + # NOTE(dzh): + # need to make it compatible with elewise fuse act not_fuse_op_first_loss, not_fuse_op_last_loss = self.check_network_convergence( model, feed_dict={"image": img, @@ -128,6 +130,7 @@ class TestMNIST(TestParallelExecutorBase): use_cuda=use_cuda, fuse_elewise_add_act_ops=False, memory_opt=False, + use_ir_memory_optimize=False, optimizer=_optimizer) fuse_op_first_loss, fuse_op_last_loss = self.check_network_convergence( model, @@ -136,6 +139,7 @@ class TestMNIST(TestParallelExecutorBase): use_cuda=use_cuda, fuse_elewise_add_act_ops=True, memory_opt=False, + use_ir_memory_optimize=False, optimizer=_optimizer) for loss in zip(not_fuse_op_first_loss, fuse_op_first_loss): diff --git a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_transformer.py b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..c0f480e34dcac3351ba3008ad632a29943afdb81 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_transformer.py @@ -0,0 +1,48 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import unittest +import paddle.fluid as fluid +import paddle.fluid.core as core + +os.environ['FLAGS_eager_delete_tensor_gb'] = "0.0" +os.environ[ + 'RECORDIO_FILENAME'] = '/tmp/ir_memory_optimize_transformer.wmt16.recordio' + +from test_parallel_executor_transformer import TestTransformer +from test_parallel_executor_transformer import transformer + + +# NOTE(dzhwinter): test diferent strategy colisions. +# open the eager delete tensor strategy by default. +class TestTransformerWithIR(TestTransformer): + def test_main(self): + if core.is_compiled_with_cuda(): + # check python transpiler + self.check_network_convergence( + transformer, + use_cuda=True, + memory_opt=True, + use_ir_memory_optimize=False) + # check IR memory optimize + self.check_network_convergence( + transformer, + use_cuda=True, + memory_opt=False, + use_ir_memory_optimize=True) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/requirements.txt b/python/requirements.txt index 5a70f1aa3ffc0ab6d4d148eb5bc26981784f2d32..6cbda1db545957b8e7b943366f9b954ff1dfdc77 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -1,6 +1,6 @@ requests==2.9.2 numpy>=1.12 -protobuf==3.1 +protobuf>=3.6 recordio>=0.1.0 matplotlib==2.2.3 # TODO: let python3 paddlepaddle package use latest matplotlib rarfile diff --git a/tools/manylinux1/Dockerfile.x64 b/tools/manylinux1/Dockerfile.x64 index 48fd145e5fe6735fca3096752f801b1ec1cb39f0..c2fd743f62f536ab7443ca215d100478021d8f7c 100644 --- a/tools/manylinux1/Dockerfile.x64 +++ b/tools/manylinux1/Dockerfile.x64 @@ -31,10 +31,10 @@ RUN wget --no-check-certificate -qO- https://storage.googleapis.com/golang/go1.8 ENV GOROOT=/usr/local/go GOPATH=/root/gopath ENV PATH=${GOROOT}/bin:${GOPATH}/bin:${PATH} -# protobuf 3.1.0 -RUN cd /opt && wget -q --no-check-certificate https://github.com/google/protobuf/releases/download/v3.1.0/protobuf-cpp-3.1.0.tar.gz && \ - tar xzf protobuf-cpp-3.1.0.tar.gz && \ - cd protobuf-3.1.0 && ./configure && make -j4 && make install && cd .. && rm -f protobuf-cpp-3.1.0.tar.gz +# protobuf 3.6.1 +RUN cd /opt && wget -q --no-check-certificate https://github.com/google/protobuf/releases/download/v3.6.1/protobuf-cpp-3.6.1.tar.gz && \ + tar xzf protobuf-cpp-3.6.1.tar.gz && \ + cd protobuf-3.6.1 && ./configure && make -j4 && make install && cd .. && rm -f protobuf-cpp-3.6.1.tar.gz RUN wget https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/python/requirements.txt -O /root/requirements.txt diff --git a/tools/manylinux1/build_scripts/build.sh b/tools/manylinux1/build_scripts/build.sh index 6c551eceb4543bf33229b9e5b5124522f3ee134c..5b676c024317596832870232dccb33895cae0c3e 100644 --- a/tools/manylinux1/build_scripts/build.sh +++ b/tools/manylinux1/build_scripts/build.sh @@ -17,7 +17,7 @@ OPENSSL_ROOT=openssl-1.1.0i OPENSSL_HASH=ebbfc844a8c8cc0ea5dc10b86c9ce97f401837f3fa08c17b2cdadc118253cf99 EPEL_RPM_HASH=e5ed9ecf22d0c4279e92075a64c757ad2b38049bcf5c16c4f2b75d5f6860dc0d DEVTOOLS_HASH=a8ebeb4bed624700f727179e6ef771dafe47651131a00a78b342251415646acc -PATCHELF_HASH=d9afdff4baeacfbc64861454f368b7f2c15c44d245293f7587bbf726bfe722fb +PATCHELF_HASH=f2aa40a6148cb3b0ca807a1bf836b081793e55ec9e5540a5356d800132be7e0a CURL_ROOT=curl-7.49.1 CURL_HASH=eb63cec4bef692eab9db459033f409533e6d10e20942f4b060b32819e81885f1 AUTOCONF_ROOT=autoconf-2.69 @@ -107,11 +107,11 @@ curl-config --features rm -rf /usr/local/ssl # Install patchelf (latest with unreleased bug fixes) -curl -sLO http://nipy.bic.berkeley.edu/manylinux/patchelf-0.9njs2.tar.gz -check_sha256sum patchelf-0.9njs2.tar.gz $PATCHELF_HASH -tar -xzf patchelf-0.9njs2.tar.gz -(cd patchelf-0.9njs2 && ./configure && make && make install) -rm -rf patchelf-0.9njs2.tar.gz patchelf-0.9njs2 +curl -sLO https://nixos.org/releases/patchelf/patchelf-0.9/patchelf-0.9.tar.gz +check_sha256sum patchelf-0.9.tar.gz $PATCHELF_HASH +tar -xzf patchelf-0.9.tar.gz +(cd patchelf-0.9 && ./configure && make && make install) +rm -rf patchelf-0.9.tar.gz patchelf-0.9 # Install latest pypi release of auditwheel LD_LIBRARY_PATH="${ORIGINAL_LD_LIBRARY_PATH}:$(dirname ${PY35_BIN})/lib" $PY35_BIN/pip install auditwheel