From 7c8c9dc9bf441ee3360ec416fd71dbf5921ba391 Mon Sep 17 00:00:00 2001 From: peizhilin Date: Thu, 22 Nov 2018 19:15:47 +0800 Subject: [PATCH] fix unit test cases --- cmake/generic.cmake | 11 ++++++-- .../framework/details/all_reduce_op_handle.cc | 4 +-- .../framework/details/all_reduce_op_handle.h | 6 ++--- .../framework/details/broadcast_op_handle.cc | 2 +- .../framework/details/broadcast_op_handle.h | 6 ++--- .../details/broadcast_op_handle_test.h | 12 ++++----- .../fluid/framework/details/build_strategy.cc | 4 +-- .../fluid/framework/details/build_strategy.h | 4 +-- .../details/data_balance_op_handle.cc | 2 +- .../details/data_balance_op_handle.h | 4 +-- .../details/fused_broadcast_op_handle.h | 4 +-- .../details/fused_broadcast_op_handle_test.cc | 4 +-- .../details/multi_devices_graph_pass.cc | 16 +++++------ .../details/multi_devices_graph_pass.h | 2 +- .../framework/details/reduce_op_handle.cc | 2 +- .../framework/details/reduce_op_handle.h | 4 +-- .../details/reduce_op_handle_test.cc | 12 ++++----- .../fluid/framework/ir/is_test_pass_tester.cc | 5 +++- .../inference/analysis/analyzer_tester.cc | 3 ++- paddle/fluid/inference/api/helper.h | 5 +--- .../inference/tests/api/anakin_rnn1_tester.cc | 1 - .../tests/book/test_inference_nlp.cc | 1 - paddle/fluid/inference/tests/test_helper.h | 1 + paddle/fluid/operators/beam_search_op_test.cc | 16 +++++------ .../operators/distributed/grpc_client.cc | 2 +- .../fluid/operators/distributed/grpc_serde.cc | 2 +- .../fluid/operators/distributed/grpc_serde.h | 3 ++- .../operators/distributed/sendrecvop_utils.cc | 2 +- .../operators/distributed/sendrecvop_utils.h | 2 +- paddle/fluid/operators/math/cpu_vec_test.cc | 2 +- paddle/fluid/operators/math/im2col_test.cc | 2 +- .../fluid/operators/math/jit_kernel_test.cc | 2 +- paddle/fluid/platform/cudnn_helper.h | 2 +- paddle/fluid/platform/dynload/cudnn.h | 14 +++++----- paddle/fluid/platform/gpu_info.cc | 5 +++- .../fluid/platform/stream_callback_manager.h | 2 +- paddle/legacy/cuda/include/hl_warpctc_wrap.h | 3 ++- paddle/legacy/cuda/src/hl_cuda_device.cc | 4 +++ paddle/legacy/utils/ThreadLocal.h | 4 ++- paddle/legacy/utils/Util.h | 27 +++++++++++++++++++ paddle/testing/CMakeLists.txt | 6 +++-- python/paddle/fluid/metrics.py | 4 +-- .../fluid/tests/unittests/CMakeLists.txt | 8 +++--- 43 files changed, 138 insertions(+), 89 deletions(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 111627a932..cabef3f713 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -349,10 +349,17 @@ function(cc_test TARGET_NAME) set(oneValueArgs "") set(multiValueArgs SRCS DEPS ARGS) cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + if(WIN32) + list(APPEND win32_deps shlwapi) + if("${cc_test_DEPS};" MATCHES "python;") + list(REMOVE_ITEM cc_test_DEPS python) + list(APPEND win32_deps ${PYTHON_LIBRARIES}) + endif() + endif(WIN32) add_executable(${TARGET_NAME} ${cc_test_SRCS}) target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog) if(WIN32) - target_link_libraries(${TARGET_NAME} shlwapi) + target_link_libraries(${TARGET_NAME} ${win32_deps}) endif(WIN32) add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog) add_test(NAME ${TARGET_NAME} @@ -679,7 +686,7 @@ function(py_test TARGET_NAME) set(multiValueArgs SRCS DEPS ARGS ENVS) cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) add_test(NAME ${TARGET_NAME} - COMMAND env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true + COMMAND ${CMAKE_COMMAND} -E env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true FLAGS_cpu_deterministic=true PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_test_ENVS} ${PYTHON_EXECUTABLE} -u ${py_test_SRCS} ${py_test_ARGS} diff --git a/paddle/fluid/framework/details/all_reduce_op_handle.cc b/paddle/fluid/framework/details/all_reduce_op_handle.cc index b869015676..a003995ae3 100644 --- a/paddle/fluid/framework/details/all_reduce_op_handle.cc +++ b/paddle/fluid/framework/details/all_reduce_op_handle.cc @@ -23,7 +23,7 @@ namespace paddle { namespace framework { namespace details { -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) AllReduceOpHandle::AllReduceOpHandle(ir::Node *node, const std::vector &local_scopes, const std::vector &places, @@ -74,7 +74,7 @@ void AllReduceOpHandle::RunImpl() { } if (platform::is_gpu_place(lod_tensors[0]->place())) { -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) PADDLE_ENFORCE(nccl_ctxs_, "nccl_ctxs should not be nullptr."); int dtype = -1; size_t numel = 0; diff --git a/paddle/fluid/framework/details/all_reduce_op_handle.h b/paddle/fluid/framework/details/all_reduce_op_handle.h index f6ef3a1367..b449796fca 100644 --- a/paddle/fluid/framework/details/all_reduce_op_handle.h +++ b/paddle/fluid/framework/details/all_reduce_op_handle.h @@ -20,7 +20,7 @@ #include "paddle/fluid/framework/details/op_handle_base.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/scope.h" -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) #include "paddle/fluid/platform/nccl_helper.h" #endif @@ -29,7 +29,7 @@ namespace framework { namespace details { struct AllReduceOpHandle : public OpHandleBase { -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) AllReduceOpHandle(ir::Node *node, const std::vector &local_scopes, const std::vector &places, const platform::NCCLContextMap *ctxs); @@ -49,7 +49,7 @@ struct AllReduceOpHandle : public OpHandleBase { private: std::vector local_scopes_; std::vector places_; -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) const platform::NCCLContextMap *nccl_ctxs_; #endif }; diff --git a/paddle/fluid/framework/details/broadcast_op_handle.cc b/paddle/fluid/framework/details/broadcast_op_handle.cc index 8e5e542765..d98df3bbad 100644 --- a/paddle/fluid/framework/details/broadcast_op_handle.cc +++ b/paddle/fluid/framework/details/broadcast_op_handle.cc @@ -82,7 +82,7 @@ void BroadcastOpHandle::BroadcastOneVar( }); } } else { -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) VarHandle *out_handle = nullptr; int root_id = boost::get(in_tensor.place()).device; std::vector> broadcast_calls; diff --git a/paddle/fluid/framework/details/broadcast_op_handle.h b/paddle/fluid/framework/details/broadcast_op_handle.h index 72180fac86..0c75e05f86 100644 --- a/paddle/fluid/framework/details/broadcast_op_handle.h +++ b/paddle/fluid/framework/details/broadcast_op_handle.h @@ -24,7 +24,7 @@ #include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/platform/device_context.h" -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) #include "paddle/fluid/platform/nccl_helper.h" #endif @@ -34,7 +34,7 @@ namespace details { struct BroadcastOpHandle : public OpHandleBase { public: -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) BroadcastOpHandle(ir::Node *node, const std::vector &local_scopes, const std::vector &places, const platform::NCCLContextMap *nccl_ctxs) @@ -68,7 +68,7 @@ struct BroadcastOpHandle : public OpHandleBase { std::vector local_scopes_; std::vector places_; -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) const platform::NCCLContextMap *nccl_ctxs_; #endif diff --git a/paddle/fluid/framework/details/broadcast_op_handle_test.h b/paddle/fluid/framework/details/broadcast_op_handle_test.h index 4305eb6573..df3b3cc9ca 100644 --- a/paddle/fluid/framework/details/broadcast_op_handle_test.h +++ b/paddle/fluid/framework/details/broadcast_op_handle_test.h @@ -42,7 +42,7 @@ struct TestBroadcastOpHandle { std::vector> nodes_; std::vector place_list_; bool use_gpu_; -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) std::unique_ptr nccl_ctxs_; #endif @@ -50,7 +50,7 @@ struct TestBroadcastOpHandle { for (size_t j = 0; j < ctxs_.size(); ++j) { ctxs_[j]->Wait(); } -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) if (nccl_ctxs_) { nccl_ctxs_->WaitAll(); } @@ -60,7 +60,7 @@ struct TestBroadcastOpHandle { void InitCtxOnGpu(bool use_gpu) { use_gpu_ = use_gpu; if (use_gpu_) { -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) int count = p::GetCUDADeviceCount(); if (count <= 1) { LOG(WARNING) << "Cannot test multi-gpu Broadcast, because the CUDA " @@ -84,7 +84,7 @@ struct TestBroadcastOpHandle { place_list_.push_back(p); ctxs_.emplace_back(new p::CPUDeviceContext(p)); } -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) nccl_ctxs_.reset(nullptr); #endif } @@ -106,14 +106,14 @@ struct TestBroadcastOpHandle { nodes_.emplace_back( ir::CreateNodeForTest("node0", ir::Node::Type::kOperation)); if (use_gpu_) { -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) op_handle_ = new BroadcastOpHandle(nodes_.back().get(), local_scopes_, place_list_, nccl_ctxs_.get()); #else PADDLE_THROW("CUDA is not support."); #endif } else { -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) op_handle_ = new BroadcastOpHandle(nodes_.back().get(), local_scopes_, place_list_, nccl_ctxs_.get()); #else diff --git a/paddle/fluid/framework/details/build_strategy.cc b/paddle/fluid/framework/details/build_strategy.cc index 37202f8695..70baced0ad 100644 --- a/paddle/fluid/framework/details/build_strategy.cc +++ b/paddle/fluid/framework/details/build_strategy.cc @@ -96,7 +96,7 @@ std::unique_ptr BuildStrategy::Apply( const std::string &loss_var_name, const std::unordered_set ¶m_names, const std::vector &local_scopes, -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) const bool use_cuda, platform::NCCLContextMap *nccl_ctxs) const { #else const bool use_cuda) const { @@ -118,7 +118,7 @@ std::unique_ptr BuildStrategy::Apply( pass->Erase("local_scopes"); pass->SetNotOwned>("local_scopes", &local_scopes); -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) platform::NCCLContextMap *nctx = use_cuda ? nccl_ctxs : nullptr; pass->Erase("nccl_ctxs"); pass->SetNotOwned("nccl_ctxs", nctx); diff --git a/paddle/fluid/framework/details/build_strategy.h b/paddle/fluid/framework/details/build_strategy.h index fc2641dbd4..3236c35efd 100644 --- a/paddle/fluid/framework/details/build_strategy.h +++ b/paddle/fluid/framework/details/build_strategy.h @@ -23,7 +23,7 @@ #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/enforce.h" -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) #include "paddle/fluid/platform/nccl_helper.h" #endif @@ -98,7 +98,7 @@ struct BuildStrategy { const std::string &loss_var_name, const std::unordered_set ¶m_names, const std::vector &local_scopes, -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) const bool use_cuda, platform::NCCLContextMap *nccl_ctxs) const; #else const bool use_cuda) const; diff --git a/paddle/fluid/framework/details/data_balance_op_handle.cc b/paddle/fluid/framework/details/data_balance_op_handle.cc index 0b772f9b63..cc562c7b10 100644 --- a/paddle/fluid/framework/details/data_balance_op_handle.cc +++ b/paddle/fluid/framework/details/data_balance_op_handle.cc @@ -20,7 +20,7 @@ namespace paddle { namespace framework { namespace details { -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) DataBalanceOpHandle::DataBalanceOpHandle( ir::Node *node, const std::vector &local_scopes, const std::vector &places, diff --git a/paddle/fluid/framework/details/data_balance_op_handle.h b/paddle/fluid/framework/details/data_balance_op_handle.h index 0462fb6ec7..2db18a1a72 100644 --- a/paddle/fluid/framework/details/data_balance_op_handle.h +++ b/paddle/fluid/framework/details/data_balance_op_handle.h @@ -19,7 +19,7 @@ #include "paddle/fluid/framework/details/op_handle_base.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/scope.h" -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) #include "paddle/fluid/platform/nccl_helper.h" #endif @@ -29,7 +29,7 @@ namespace details { struct DataBalanceOpHandle : public OpHandleBase { public: -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) DataBalanceOpHandle(ir::Node *node, const std::vector &local_scopes, const std::vector &places, const platform::NCCLContextMap *ctxs); diff --git a/paddle/fluid/framework/details/fused_broadcast_op_handle.h b/paddle/fluid/framework/details/fused_broadcast_op_handle.h index e37259526a..e43d545c9c 100644 --- a/paddle/fluid/framework/details/fused_broadcast_op_handle.h +++ b/paddle/fluid/framework/details/fused_broadcast_op_handle.h @@ -25,7 +25,7 @@ #include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/platform/device_context.h" -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) #include "paddle/fluid/platform/nccl_helper.h" #endif @@ -35,7 +35,7 @@ namespace details { struct FusedBroadcastOpHandle : public BroadcastOpHandle { public: -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) FusedBroadcastOpHandle(ir::Node *node, const std::vector local_scopes, const std::vector &places, diff --git a/paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc b/paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc index 541993c743..be0d941c4f 100644 --- a/paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc +++ b/paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc @@ -44,14 +44,14 @@ struct TestFusedBroadcastOpHandle : TestBroadcastOpHandle { nodes_.emplace_back( ir::CreateNodeForTest("fused_broadcast", ir::Node::Type::kOperation)); if (use_gpu_) { -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) op_handle_ = new FusedBroadcastOpHandle( nodes_.back().get(), local_scopes_, place_list_, nccl_ctxs_.get()); #else PADDLE_THROW("CUDA is not supported."); #endif } else { -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) op_handle_ = new FusedBroadcastOpHandle( nodes_.back().get(), local_scopes_, place_list_, nccl_ctxs_.get()); #else diff --git a/paddle/fluid/framework/details/multi_devices_graph_pass.cc b/paddle/fluid/framework/details/multi_devices_graph_pass.cc index 8c98b78130..26666212ae 100644 --- a/paddle/fluid/framework/details/multi_devices_graph_pass.cc +++ b/paddle/fluid/framework/details/multi_devices_graph_pass.cc @@ -142,7 +142,7 @@ void MultiDevSSAGraphBuilder::Init() const { places_ = Get>(kPlaces); local_scopes_ = Get>(kLocalScopes); strategy_ = Get(kStrategy); -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) nccl_ctxs_ = &Get("nccl_ctxs"); #endif @@ -431,7 +431,7 @@ std::unique_ptr MultiDevSSAGraphBuilder::ApplyImpl( } } bool use_gpu = false; -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) use_gpu = nccl_ctxs_ != nullptr; #endif @@ -478,7 +478,7 @@ bool MultiDevSSAGraphBuilder::IsSparseGradient(const std::string &og) const { void MultiDevSSAGraphBuilder::SetCommunicationContext( OpHandleBase *op_handle, const platform::Place &p) const { -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) if (nccl_ctxs_ == nullptr) { op_handle->SetDeviceContext(p, platform::DeviceContextPool::Instance().Get(p)); @@ -492,7 +492,7 @@ void MultiDevSSAGraphBuilder::SetCommunicationContext( void MultiDevSSAGraphBuilder::CreateBroadcastOp(ir::Graph *result, const std::string &p_name, size_t src_dev_id) const { -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) auto *op_handle = new BroadcastOpHandle( result->CreateEmptyNode("broadcast", ir::Node::Type::kOperation), local_scopes_, places_, nccl_ctxs_); @@ -522,7 +522,7 @@ void MultiDevSSAGraphBuilder::CreateBroadcastOp(ir::Graph *result, void MultiDevSSAGraphBuilder::CreateFusedBroadcastOp( ir::Graph *result, const std::vector> &bcast_varnames) const { -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) auto *op_handle = new FusedBroadcastOpHandle( result->CreateEmptyNode("fused_broadcast", ir::Node::Type::kOperation), local_scopes_, places_, nccl_ctxs_); @@ -568,7 +568,7 @@ void MultiDevSSAGraphBuilder::CreateComputationalOp(ir::Graph *result, void MultiDevSSAGraphBuilder::InsertAllReduceOp(ir::Graph *result, const std::string &og) const { -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) result->Get(kGraphOps).emplace_back(new AllReduceOpHandle( result->CreateEmptyNode("allreduce", ir::Node::Type::kOperation), local_scopes_, places_, nccl_ctxs_)); @@ -597,7 +597,7 @@ void MultiDevSSAGraphBuilder::InsertAllReduceOp(ir::Graph *result, void MultiDevSSAGraphBuilder::InsertDataBalanceOp( ir::Graph *result, const std::vector &datas) const { -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) result->Get(kGraphOps).emplace_back(new DataBalanceOpHandle( result->CreateEmptyNode("data_balance", ir::Node::Type::kOperation), local_scopes_, places_, nccl_ctxs_)); @@ -694,7 +694,7 @@ void MultiDevSSAGraphBuilder::CreateComputationalOps(ir::Graph *result, VarHandle *MultiDevSSAGraphBuilder::CreateReduceOp(ir::Graph *result, const std::string &og, int dst_dev_id) const { -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) result->Get(kGraphOps).emplace_back(new ReduceOpHandle( result->CreateEmptyNode("reduce", ir::Node::Type::kOperation), local_scopes_, places_, nccl_ctxs_)); diff --git a/paddle/fluid/framework/details/multi_devices_graph_pass.h b/paddle/fluid/framework/details/multi_devices_graph_pass.h index f3ec2d2941..8e462aec7d 100644 --- a/paddle/fluid/framework/details/multi_devices_graph_pass.h +++ b/paddle/fluid/framework/details/multi_devices_graph_pass.h @@ -40,7 +40,7 @@ class MultiDevSSAGraphBuilder : public ir::Pass { size_t device_id) const; void Init() const; -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) mutable platform::NCCLContextMap *nccl_ctxs_; #endif diff --git a/paddle/fluid/framework/details/reduce_op_handle.cc b/paddle/fluid/framework/details/reduce_op_handle.cc index 4503123eac..c9f1107aea 100644 --- a/paddle/fluid/framework/details/reduce_op_handle.cc +++ b/paddle/fluid/framework/details/reduce_op_handle.cc @@ -125,7 +125,7 @@ void ReduceOpHandle::RunImpl() { } }); } else if (paddle::platform::is_gpu_place(lod_tensors[0]->place())) { -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) auto pre_in = pre_in_var->Get(); VariableVisitor::ShareDimsAndLoD(*pre_in_var, out_var); VariableVisitor::GetMutableTensor(out_var).mutable_data( diff --git a/paddle/fluid/framework/details/reduce_op_handle.h b/paddle/fluid/framework/details/reduce_op_handle.h index 999828ae45..846839029c 100644 --- a/paddle/fluid/framework/details/reduce_op_handle.h +++ b/paddle/fluid/framework/details/reduce_op_handle.h @@ -23,7 +23,7 @@ #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/platform/device_context.h" -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) #include "paddle/fluid/platform/nccl_helper.h" #endif @@ -35,7 +35,7 @@ struct ReduceOpHandle : public OpHandleBase { std::vector local_scopes_; std::vector places_; -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) const platform::NCCLContextMap *nccl_ctxs_; ReduceOpHandle(ir::Node *node, const std::vector &local_scopes, const std::vector &places, diff --git a/paddle/fluid/framework/details/reduce_op_handle_test.cc b/paddle/fluid/framework/details/reduce_op_handle_test.cc index 72299c0bfa..6cee4770e6 100644 --- a/paddle/fluid/framework/details/reduce_op_handle_test.cc +++ b/paddle/fluid/framework/details/reduce_op_handle_test.cc @@ -35,7 +35,7 @@ struct TestReduceOpHandle { std::vector gpu_list_; std::vector> ctxs_; -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) std::unique_ptr nccl_ctxs_; #endif @@ -43,7 +43,7 @@ struct TestReduceOpHandle { for (size_t j = 0; j < ctxs_.size(); ++j) { ctxs_[j]->Wait(); } -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) if (nccl_ctxs_) { nccl_ctxs_->WaitAll(); } @@ -53,7 +53,7 @@ struct TestReduceOpHandle { void InitCtxOnGpu(bool use_gpu) { use_gpu_ = use_gpu; if (use_gpu) { -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) int count = p::GetCUDADeviceCount(); if (count <= 1) { LOG(WARNING) << "Cannot test multi-gpu Broadcast, because the CUDA " @@ -77,7 +77,7 @@ struct TestReduceOpHandle { gpu_list_.push_back(p); ctxs_.emplace_back(new p::CPUDeviceContext(p)); } -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) nccl_ctxs_.reset(nullptr); #endif } @@ -99,14 +99,14 @@ struct TestReduceOpHandle { nodes.emplace_back(new ir::Node("node")); if (use_gpu_) { -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) op_handle_.reset(new ReduceOpHandle(nodes.back().get(), local_scopes_, gpu_list_, nccl_ctxs_.get())); #else PADDLE_THROW("CUDA is not support."); #endif } else { -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) op_handle_.reset(new ReduceOpHandle(nodes.back().get(), local_scopes_, gpu_list_, nccl_ctxs_.get())); #else diff --git a/paddle/fluid/framework/ir/is_test_pass_tester.cc b/paddle/fluid/framework/ir/is_test_pass_tester.cc index cd2cb0c9f8..9696441a21 100644 --- a/paddle/fluid/framework/ir/is_test_pass_tester.cc +++ b/paddle/fluid/framework/ir/is_test_pass_tester.cc @@ -15,7 +15,10 @@ #include "paddle/fluid/framework/ir/is_test_pass.h" #include - +#ifdef _WIN32 +#undef FALSE +#undef TRUE +#endif namespace paddle { namespace framework { namespace ir { diff --git a/paddle/fluid/inference/analysis/analyzer_tester.cc b/paddle/fluid/inference/analysis/analyzer_tester.cc index 84a0c3374c..7710ed7b61 100644 --- a/paddle/fluid/inference/analysis/analyzer_tester.cc +++ b/paddle/fluid/inference/analysis/analyzer_tester.cc @@ -19,6 +19,7 @@ #include "paddle/fluid/inference/analysis/ut_helper.h" #include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/api/paddle_inference_pass.h" +#include "paddle/fluid/platform/port.h" namespace paddle { namespace inference { @@ -75,7 +76,7 @@ void TestWord2vecPrediction(const std::string& model_path) { 0.000932706}; const size_t num_elements = outputs.front().data.length() / sizeof(float); // The outputs' buffers are in CPU memory. - for (size_t i = 0; i < std::min(5UL, num_elements); i++) { + for (size_t i = 0; i < std::min((size_t)5UL, num_elements); i++) { LOG(INFO) << "data: " << static_cast(outputs.front().data.data())[i]; PADDLE_ENFORCE(static_cast(outputs.front().data.data())[i], diff --git a/paddle/fluid/inference/api/helper.h b/paddle/fluid/inference/api/helper.h index 6f9d663121..9a393a61c4 100644 --- a/paddle/fluid/inference/api/helper.h +++ b/paddle/fluid/inference/api/helper.h @@ -15,10 +15,6 @@ #pragma once #include -#if !defined(_WIN32) -#include -#else -#endif #include #include // NOLINT @@ -28,6 +24,7 @@ #include #include #include "paddle/fluid/inference/api/paddle_inference_api.h" +#include "paddle/fluid/platform/port.h" #include "paddle/fluid/string/printf.h" namespace paddle { diff --git a/paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc b/paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc index c4022225fd..da42688f29 100644 --- a/paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc +++ b/paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include #include #include #include diff --git a/paddle/fluid/inference/tests/book/test_inference_nlp.cc b/paddle/fluid/inference/tests/book/test_inference_nlp.cc index cbcfc964c9..5c1204b9e6 100644 --- a/paddle/fluid/inference/tests/book/test_inference_nlp.cc +++ b/paddle/fluid/inference/tests/book/test_inference_nlp.cc @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include #include #include #include // NOLINT diff --git a/paddle/fluid/inference/tests/test_helper.h b/paddle/fluid/inference/tests/test_helper.h index 2118fcfd4b..75fa611c0d 100644 --- a/paddle/fluid/inference/tests/test_helper.h +++ b/paddle/fluid/inference/tests/test_helper.h @@ -20,6 +20,7 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/inference/io.h" +#include "paddle/fluid/platform/port.h" #include "paddle/fluid/platform/profiler.h" DECLARE_bool(use_mkldnn); diff --git a/paddle/fluid/operators/beam_search_op_test.cc b/paddle/fluid/operators/beam_search_op_test.cc index 501807e7f3..80fdd22fbb 100644 --- a/paddle/fluid/operators/beam_search_op_test.cc +++ b/paddle/fluid/operators/beam_search_op_test.cc @@ -30,23 +30,23 @@ using std::endl; void CreateInput(LoDTensor* ids, LoDTensor* scores) { LoD lod; - vector level0({0, 2, 4}); - vector level1({0, 1, 2, 3, 4}); + vector level0{0, 2, 4}; + vector level1{0, 1, 2, 3, 4}; lod.push_back(level0); lod.push_back(level1); ids->set_lod(lod); scores->set_lod(lod); - auto dims = framework::make_ddim(vector({4, 3})); + auto dims = framework::make_ddim(vector{4, 3}); ids->Resize(dims); scores->Resize(dims); CPUPlace place; auto* ids_data = ids->mutable_data(place); auto* scores_data = scores->mutable_data(place); - vector _ids({4, 2, 5, 2, 1, 3, 3, 5, 2, 8, 2, 1}); - vector _scores( - {0.5, 0.3, 0.2, 0.6, 0.3, 0.1, 0.9, 0.5, 0.1, 0.7, 0.5, 0.1}); + vector _ids{4, 2, 5, 2, 1, 3, 3, 5, 2, 8, 2, 1}; + vector _scores{0.5f, 0.3f, 0.2f, 0.6f, 0.3f, 0.1f, + 0.9f, 0.5f, 0.1f, 0.7f, 0.5f, 0.1f}; for (int i = 0; i < 12; i++) { ids_data[i] = _ids[i]; @@ -79,8 +79,8 @@ TEST(DISABLED_beam_search_op, run) { ASSERT_EQ(sids.lod(), sscores.lod()); - vector tids({4, 2, 3, 8}); - vector tscores({0.5, 0.6, 0.9, 0.7}); + vector tids{4, 2, 3, 8}; + vector tscores{0.5f, 0.6f, 0.9f, 0.7f}; for (int i = 0; i < 4; i++) { ASSERT_EQ(tids[i], sids.data()[i]); diff --git a/paddle/fluid/operators/distributed/grpc_client.cc b/paddle/fluid/operators/distributed/grpc_client.cc index c28f86146d..3548d5d9fb 100644 --- a/paddle/fluid/operators/distributed/grpc_client.cc +++ b/paddle/fluid/operators/distributed/grpc_client.cc @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include #include #include "glog/logging.h" // For VLOG @@ -20,6 +19,7 @@ limitations under the License. */ #include "paddle/fluid/operators/distributed/grpc_client.h" #include "paddle/fluid/operators/distributed/grpc_serde.h" #include "paddle/fluid/operators/distributed/request_handler.h" +#include "paddle/fluid/platform/port.h" #include "paddle/fluid/platform/profiler.h" namespace paddle { diff --git a/paddle/fluid/operators/distributed/grpc_serde.cc b/paddle/fluid/operators/distributed/grpc_serde.cc index f27b70a5a3..e6856676d4 100644 --- a/paddle/fluid/operators/distributed/grpc_serde.cc +++ b/paddle/fluid/operators/distributed/grpc_serde.cc @@ -15,7 +15,6 @@ limitations under the License. */ #ifdef PADDLE_WITH_CUDA #include #endif -#include #include // NOLINT #include "google/protobuf/io/coded_stream.h" @@ -26,6 +25,7 @@ limitations under the License. */ #include "paddle/fluid/operators/distributed/grpc_variable_response.h" #include "paddle/fluid/operators/distributed/proto_encoder_helper.h" #include "paddle/fluid/operators/distributed/sendrecvop_utils.h" +#include "paddle/fluid/platform/port.h" #include "paddle/fluid/platform/profiler.h" namespace paddle { diff --git a/paddle/fluid/operators/distributed/grpc_serde.h b/paddle/fluid/operators/distributed/grpc_serde.h index 7ec489e961..17290d3fb4 100644 --- a/paddle/fluid/operators/distributed/grpc_serde.h +++ b/paddle/fluid/operators/distributed/grpc_serde.h @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include + #include #include #include @@ -25,6 +25,7 @@ limitations under the License. */ #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/framework/var_type.h" #include "paddle/fluid/operators/distributed/sendrecvop_utils.h" +#include "paddle/fluid/platform/port.h" #include "paddle/fluid/operators/distributed/send_recv.grpc.pb.h" #include "paddle/fluid/operators/distributed/send_recv.pb.h" diff --git a/paddle/fluid/operators/distributed/sendrecvop_utils.cc b/paddle/fluid/operators/distributed/sendrecvop_utils.cc index 374fa680e3..0abebb9240 100644 --- a/paddle/fluid/operators/distributed/sendrecvop_utils.cc +++ b/paddle/fluid/operators/distributed/sendrecvop_utils.cc @@ -15,12 +15,12 @@ limitations under the License. */ #ifdef PADDLE_WITH_CUDA #include #endif -#include #include // NOLINT #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/operators/distributed/sendrecvop_utils.h" #include "paddle/fluid/operators/distributed/variable_response.h" +#include "paddle/fluid/platform/port.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/distributed/sendrecvop_utils.h b/paddle/fluid/operators/distributed/sendrecvop_utils.h index 480fc59c42..523e56fe3e 100644 --- a/paddle/fluid/operators/distributed/sendrecvop_utils.h +++ b/paddle/fluid/operators/distributed/sendrecvop_utils.h @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include #include #include #include @@ -24,6 +23,7 @@ limitations under the License. */ #include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/framework/var_type.h" +#include "paddle/fluid/platform/port.h" #include "paddle/fluid/operators/distributed/send_recv.pb.h" diff --git a/paddle/fluid/operators/math/cpu_vec_test.cc b/paddle/fluid/operators/math/cpu_vec_test.cc index 18a586f8dd..ad734bae42 100644 --- a/paddle/fluid/operators/math/cpu_vec_test.cc +++ b/paddle/fluid/operators/math/cpu_vec_test.cc @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include #include #include #include @@ -22,6 +21,7 @@ limitations under the License. */ #include "gtest/gtest.h" #include "paddle/fluid/operators/math/cpu_vec.h" +#include "paddle/fluid/platform/port.h" inline double GetCurrentUS() { struct timeval time; diff --git a/paddle/fluid/operators/math/im2col_test.cc b/paddle/fluid/operators/math/im2col_test.cc index ae2c90b33a..521cd7801a 100644 --- a/paddle/fluid/operators/math/im2col_test.cc +++ b/paddle/fluid/operators/math/im2col_test.cc @@ -14,9 +14,9 @@ limitations under the License. */ #include "paddle/fluid/operators/math/im2col.h" #include -#include #include #include "paddle/fluid/operators/math/im2col_cfo_cpu.h" +#include "paddle/fluid/platform/port.h" template void testIm2col() { diff --git a/paddle/fluid/operators/math/jit_kernel_test.cc b/paddle/fluid/operators/math/jit_kernel_test.cc index b6c62a2634..8662e1c50d 100644 --- a/paddle/fluid/operators/math/jit_kernel_test.cc +++ b/paddle/fluid/operators/math/jit_kernel_test.cc @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/jit_kernel.h" -#include #include // for exp #include // for memcpy #include @@ -22,6 +21,7 @@ limitations under the License. */ #include "gflags/gflags.h" #include "glog/logging.h" #include "gtest/gtest.h" +#include "paddle/fluid/platform/port.h" #ifdef PADDLE_WITH_MKLML #include "paddle/fluid/platform/dynload/mklml.h" diff --git a/paddle/fluid/platform/cudnn_helper.h b/paddle/fluid/platform/cudnn_helper.h index 682b0c0ff3..61a25064d1 100644 --- a/paddle/fluid/platform/cudnn_helper.h +++ b/paddle/fluid/platform/cudnn_helper.h @@ -62,7 +62,7 @@ inline const char* cudnnGetErrorString(cudnnStatus_t status) { #define CUDNN_ENFORCE(condition) \ do { \ - cudnnStatus_t status = condition; \ + auto status = condition; \ if (UNLIKELY(status != CUDNN_STATUS_SUCCESS)) { \ PADDLE_THROW(::paddle::platform::cudnnGetErrorString(status)); \ } \ diff --git a/paddle/fluid/platform/dynload/cudnn.h b/paddle/fluid/platform/dynload/cudnn.h index 1a83ac7780..db62377898 100644 --- a/paddle/fluid/platform/dynload/cudnn.h +++ b/paddle/fluid/platform/dynload/cudnn.h @@ -48,13 +48,13 @@ extern void EnforceCUDNNLoaded(const char* fn_name); #else -#define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name) \ - struct DynLoad__##__name { \ - template \ - inline cudnnStatus_t operator()(Args... args) { \ - return ::__name(args...); \ - } \ - }; \ +#define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + inline auto operator()(Args... args) { \ + return ::__name(args...); \ + } \ + }; \ extern DynLoad__##__name __name #endif diff --git a/paddle/fluid/platform/gpu_info.cc b/paddle/fluid/platform/gpu_info.cc index c78f159ad2..e0d0051ad0 100644 --- a/paddle/fluid/platform/gpu_info.cc +++ b/paddle/fluid/platform/gpu_info.cc @@ -19,7 +19,10 @@ limitations under the License. */ #include "gflags/gflags.h" #include "paddle/fluid/platform/enforce.h" -DEFINE_double(fraction_of_gpu_memory_to_use, 0.92, +// fraction_of_gpu_memory_to_use cannot be too high on windows, +// since the win32 graphic sub-system can occupy some GPU memory +// which may lead to insufficient memory left for paddle +DEFINE_double(fraction_of_gpu_memory_to_use, 0.5, "Allocate a trunk of gpu memory that is this fraction of the " "total gpu memory size. Future memory usage will be allocated " "from the trunk. If the trunk doesn't have enough gpu memory, " diff --git a/paddle/fluid/platform/stream_callback_manager.h b/paddle/fluid/platform/stream_callback_manager.h index 11c68f3449..8dcfc4e748 100644 --- a/paddle/fluid/platform/stream_callback_manager.h +++ b/paddle/fluid/platform/stream_callback_manager.h @@ -18,7 +18,7 @@ #include #include #include -#include "ThreadPool.h" +#include #include "paddle/fluid/platform/enforce.h" namespace paddle { diff --git a/paddle/legacy/cuda/include/hl_warpctc_wrap.h b/paddle/legacy/cuda/include/hl_warpctc_wrap.h index 0857bd1aa1..09cbd6d450 100644 --- a/paddle/legacy/cuda/include/hl_warpctc_wrap.h +++ b/paddle/legacy/cuda/include/hl_warpctc_wrap.h @@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#ifndef _WIN32 #ifndef HL_WARPCTC_WRAP_H_ #define HL_WARPCTC_WRAP_H_ - #include "ctc.h" #include "hl_base.h" @@ -91,3 +91,4 @@ extern void hl_warpctc_get_workspace_size(const int* cpuLabelLengths, size_t* bytes); #endif // HL_WARPCTC_WRAP_H_ +#endif diff --git a/paddle/legacy/cuda/src/hl_cuda_device.cc b/paddle/legacy/cuda/src/hl_cuda_device.cc index 501e3b0f3b..a6e27a37ff 100644 --- a/paddle/legacy/cuda/src/hl_cuda_device.cc +++ b/paddle/legacy/cuda/src/hl_cuda_device.cc @@ -132,11 +132,15 @@ inline pid_t gettid() { uint64_t tid; pthread_threadid_np(NULL, &tid); #else +#ifndef _WIN32 #ifndef __NR_gettid #define __NR_gettid 224 #endif pid_t tid = syscall(__NR_gettid); #endif +#else // _WIN32 + pid_t tid = _getpid(); +#endif // _WIN32 CHECK_NE((int)tid, -1); return tid; } diff --git a/paddle/legacy/utils/ThreadLocal.h b/paddle/legacy/utils/ThreadLocal.h index c5b07506d3..6268b73a85 100644 --- a/paddle/legacy/utils/ThreadLocal.h +++ b/paddle/legacy/utils/ThreadLocal.h @@ -14,10 +14,12 @@ limitations under the License. */ #pragma once +#ifndef _WIN32 #include #include -#include #include +#endif +#include #include #include #include diff --git a/paddle/legacy/utils/Util.h b/paddle/legacy/utils/Util.h index e6f05e30d3..3a878b2b30 100644 --- a/paddle/legacy/utils/Util.h +++ b/paddle/legacy/utils/Util.h @@ -14,7 +14,9 @@ limitations under the License. */ #pragma once +#ifndef _WIN32 #include // for syscall() +#endif #include #include #include @@ -40,6 +42,31 @@ inline int rand_r(unsigned int* seedp) { } #endif +#ifdef _WIN32 +#define NOMINMAX // msvc max/min macro conflict with std::min/max +#include + +template +inline int __builtin_clz(const T& value) { + DWORD leadning_zero = 0; + if (_BitScanReverse(&leadning_zero, value)) { + return static_cast(sizeof(T) * 8 - leadning_zero); + } else { + return static_cast(0); + } +} + +inline int __builtin_clzl(const unsigned long& value) { + return __builtin_clz(value); +} + +inline int __builtin_clzll(const unsigned long long& value) { + return __builtin_clz(value); +} + +#define pid_t int +#endif + /** * Loop over the elements in a container * TODO(yuyang18): It's this foreach useful? Why not use C++ 11 foreach, diff --git a/paddle/testing/CMakeLists.txt b/paddle/testing/CMakeLists.txt index 2264481899..614596958e 100644 --- a/paddle/testing/CMakeLists.txt +++ b/paddle/testing/CMakeLists.txt @@ -3,8 +3,10 @@ if(WITH_TESTING) add_library(paddle_test_main STATIC TestMain.cpp) add_dependencies(paddle_test_main paddle_proto ${external_project_dependencies}) - add_library(paddle_test_util STATIC TestUtil.cpp) - add_dependencies(paddle_test_util paddle_proto ${external_project_dependencies}) + if(NOT WIN32) + add_library(paddle_test_util STATIC TestUtil.cpp) + add_dependencies(paddle_test_util paddle_proto ${external_project_dependencies}) + endif(NOT WIN32) if(NOT MOBILE_INFERENCE) cc_library(paddle_gtest_main SRCS paddle_gtest_main.cc DEPS device_context memory gtest gflags) endif() diff --git a/python/paddle/fluid/metrics.py b/python/paddle/fluid/metrics.py index f65b37903a..829154f1b2 100644 --- a/python/paddle/fluid/metrics.py +++ b/python/paddle/fluid/metrics.py @@ -46,8 +46,8 @@ def _is_numpy_(var): def _is_number_(var): - return isinstance(var, int) or isinstance(var, float) or (isinstance( - var, np.ndarray) and var.shape == (1, )) + return isinstance(var, int) or isinstance(var, np.int64) or isinstance( + var, float) or (isinstance(var, np.ndarray) and var.shape == (1, )) def _is_number_or_matrix_(var): diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 510d0304f0..3fc12d584d 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -23,9 +23,11 @@ if(NOT WITH_DISTRIBUTE) LIST(REMOVE_ITEM TEST_OPS test_dist_text_classification) endif(NOT WITH_DISTRIBUTE) -if (${CUDNN_MAJOR_VERSION} VERSION_LESS 7) - LIST(REMOVE_ITEM TEST_OPS test_conv2d_fusion_op) -endif() +if(WITH_GPU) + if (${CUDNN_MAJOR_VERSION} VERSION_LESS 7) + LIST(REMOVE_ITEM TEST_OPS test_conv2d_fusion_op) + endif() +endif(WITH_GPU) list(REMOVE_ITEM TEST_OPS test_seq_concat_op) # FIXME(helin): https://github.com/PaddlePaddle/Paddle/issues/8290 list(REMOVE_ITEM TEST_OPS test_modified_huber_loss_op) # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/5184 -- GitLab