diff --git a/CMakeLists.txt b/CMakeLists.txt index 7fda5d460d54e1db5cbaba45fd73ceb6ef5a7180..c31f51a3f7371bd7b1b0ca3234091a35868806ce 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -294,7 +294,7 @@ if(WITH_PSLIB) list(APPEND EXTERNAL_LIBS pslib_brpc) list(APPEND EXTERNAL_LIBS libmct) endif(WITH_PSLIB) - + if(WITH_AMD_GPU) find_package(HIP) include(hip) diff --git a/cmake/external/robin_map.cmake b/cmake/external/robin_map.cmake deleted file mode 100644 index ddaf59536cb2c493cf77f689e0e33a03e2e59ec4..0000000000000000000000000000000000000000 --- a/cmake/external/robin_map.cmake +++ /dev/null @@ -1,31 +0,0 @@ -include(ExternalProject) - -set(ROBIN_MAP_SOURCE_DIR ${THIRD_PARTY_PATH}/robin_map) -set(ROBIN_MAP_INCLUDE_DIR ${ROBIN_MAP_SOURCE_DIR}/src/extern_robin_map/include) - -include_directories(${ROBIN_MAP_INCLUDE_DIR}) - -ExternalProject_Add( - extern_robin_map - ${EXTERNAL_PROJECT_LOG_ARGS} - GIT_REPOSITORY "https://github.com/Tessil/robin-map.git" - GIT_TAG "v0.5.0" - PREFIX ${ROBIN_MAP_SOURCE_DIR} - UPDATE_COMMAND "" - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND "" - TEST_COMMAND "" -) - -if(${CMAKE_VERSION} VERSION_LESS "3.3.0") - set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/robin_map_dummy.c) - file(WRITE ${dummyfile} "const char *dummy = \"${dummyfile}\";") - add_library(robin_map STATIC ${dummyfile}) -else() - add_library(robin_map INTERFACE) -endif() - -add_dependencies(robin_map extern_robin_map) - -LIST(APPEND externl_project_dependencies robin_map) diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 10a637af445c45baaa013a2a1217c6a23f794638..412bc9cbe88b860a698d17e239d7b94d8956b781 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -83,7 +83,7 @@ cc_test(variable_test SRCS variable_test.cc) cc_library(threadpool SRCS threadpool.cc DEPS enforce) cc_test(threadpool_test SRCS threadpool_test.cc DEPS threadpool) -cc_library(scope SRCS scope.cc DEPS glog threadpool xxhash) +cc_library(scope SRCS scope.cc DEPS glog threadpool) cc_test(scope_test SRCS scope_test.cc DEPS scope) cc_library(data_device_transform SRCS data_device_transform.cc DEPS tensor) diff --git a/paddle/fluid/framework/details/execution_strategy.h b/paddle/fluid/framework/details/execution_strategy.h index 37b07e5736312b3050debe745f2d3c108469c5d6..15c496130c2b6c7643ff96661be09e5ac4870344 100644 --- a/paddle/fluid/framework/details/execution_strategy.h +++ b/paddle/fluid/framework/details/execution_strategy.h @@ -25,7 +25,7 @@ struct ExecutionStrategy { size_t num_threads_{0}; bool use_cuda_{true}; bool allow_op_delay_{false}; - size_t num_iteration_per_drop_scope_{1}; + size_t num_iteration_per_drop_scope_{100}; ExecutorType type_{kDefault}; bool dry_run_{false}; }; diff --git a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc index ea783c609081cd4a3b2deb83737939e250492361..57f6fc66c57e2a53d9cf30d7761626a50bc379ea 100644 --- a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc @@ -64,24 +64,21 @@ FeedFetchList ScopeBufferedSSAGraphExecutor::Run( } platform::RecordEvent e("ScopeBufferedSSAGraphExecutorAfterRun", nullptr); - ++drop_scope_counter_; + drop_scope_counter_ += 1; - if (!fetch_tensors.empty()) { + if (!fetch_tensors.empty() || + drop_scope_counter_ == strategy_.num_iteration_per_drop_scope_) { + drop_scope_counter_ = 0; // Wait All computational streams for (auto p : places_) { platform::DeviceContextPool::Instance().Get(p)->Wait(); } - } - - if (drop_scope_counter_ == strategy_.num_iteration_per_drop_scope_) { - drop_scope_counter_ = 0; for (auto &scope : local_scopes_) { auto &local_scope = *scope->Var(details::kLocalExecScopeName)->GetMutable(); scope->DeleteScope(local_scope); } } - if (eptr) { std::rethrow_exception(eptr); } else { diff --git a/paddle/fluid/framework/ir/graph.cc b/paddle/fluid/framework/ir/graph.cc index 8e67f8f6104cdaa8227c34351c6355ab8fc81b09..8670dcfed7e40473e06cd12cecc1157dd4f54aa0 100644 --- a/paddle/fluid/framework/ir/graph.cc +++ b/paddle/fluid/framework/ir/graph.cc @@ -20,10 +20,6 @@ limitations under the License. */ #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/var_desc.h" -DEFINE_bool(enforce_when_check_program, true, - "Checking whether the program is correct or not. We will log " - "errors rather than throwing exceptions if this flag turned off"); - namespace paddle { namespace framework { namespace ir { @@ -48,56 +44,27 @@ void CheckProgram(const ProgramDesc &program) { break; case _INT(OpRole::kBackward): case _INT(OpRole::kBackward) | _INT(OpRole::kLoss): - if (!FLAGS_enforce_when_check_program) { - PADDLE_ENFORCE( - visit.find(_INT(OpRole::kOptimize)) == visit.end(), - "Cannot add backward operator %s after optimize operator.", - op->Type()); - } else { - if (visit.find(_INT(OpRole::kOptimize)) != visit.end()) { - LOG(ERROR) - << "Cannot add backward operator %s after optimize operator." - << op->Type(); - } - } + PADDLE_ENFORCE( + visit.find(_INT(OpRole::kOptimize)) == visit.end(), + "Cannot add backward operator %s after optimize operator.", + op->Type()); break; case _INT(OpRole::kForward) | _INT(OpRole::kLoss): - if (!FLAGS_enforce_when_check_program) { - PADDLE_ENFORCE(visit.find(_INT(OpRole::kBackward) | - _INT(OpRole::kLoss)) == visit.end(), - "Cannot add backward|loss operator before " - "forward|loss operator %s.", - op->Type()); - PADDLE_ENFORCE( - visit.find(_INT(OpRole::kOptimize)) == visit.end(), - "Cannot add forward|loss operator %s after optimize operator.", - op->Type()); - } else { - if (visit.find(_INT(OpRole::kBackward) | _INT(OpRole::kLoss)) != - visit.end()) { - LOG(ERROR) << "Cannot add backward|loss operator before " - << "forward|loss operator %s." << op->Type(); - } - - if (visit.find(_INT(OpRole::kOptimize)) != visit.end()) { - LOG(ERROR) << "Cannot add forward|loss operator %s after optimize " - "operator." - << op->Type(); - } - } + PADDLE_ENFORCE(visit.find(_INT(OpRole::kBackward) | + _INT(OpRole::kLoss)) == visit.end(), + "Cannot add backward|loss operator before " + "forward|loss operator %s.", + op->Type()); + PADDLE_ENFORCE( + visit.find(_INT(OpRole::kOptimize)) == visit.end(), + "Cannot add forward|loss operator %s after optimize operator.", + op->Type()); break; case _INT(OpRole::kOptimize): case _INT(OpRole::kOptimize) | _INT(OpRole::kLRSched): - if (!FLAGS_enforce_when_check_program) { - PADDLE_ENFORCE(visit.find(_INT(OpRole::kBackward)) != visit.end(), - "Optimize operators %s must follow backward operator.", - op->Type()); - } else { - if (visit.find(_INT(OpRole::kBackward)) == visit.end()) { - LOG(ERROR) << "Optimize operators %s must follow backward operator." - << op->Type(); - } - } + PADDLE_ENFORCE(visit.find(_INT(OpRole::kBackward)) != visit.end(), + "Optimize operators %s must follow backward operator.", + op->Type()); break; case _INT(OpRole::kLRSched): case _INT(OpRole::kDist): diff --git a/paddle/fluid/framework/rw_lock.h b/paddle/fluid/framework/rw_lock.h index 75e6bef9bf33d438b7b0ee7f6df8d3fa6732fcfa..dbf00f3a79f7d1dcf97b346fccfdb68f119d4aa3 100644 --- a/paddle/fluid/framework/rw_lock.h +++ b/paddle/fluid/framework/rw_lock.h @@ -16,9 +16,7 @@ limitations under the License. */ #if !defined(_WIN32) #include -#else -#include // NOLINT -#endif // !_WIN32 +#endif // !_WIN32 #include "paddle/fluid/platform/enforce.h" @@ -31,17 +29,17 @@ struct RWLock { ~RWLock() { pthread_rwlock_destroy(&lock_); } - inline void RDLock() { + void RDLock() { PADDLE_ENFORCE_EQ(pthread_rwlock_rdlock(&lock_), 0, "acquire read lock failed"); } - inline void WRLock() { + void WRLock() { PADDLE_ENFORCE_EQ(pthread_rwlock_wrlock(&lock_), 0, "acquire write lock failed"); } - inline void UNLock() { + void UNLock() { PADDLE_ENFORCE_EQ(pthread_rwlock_unlock(&lock_), 0, "unlock failed"); } @@ -53,44 +51,81 @@ struct RWLock { // https://stackoverflow.com/questions/7125250/making-pthread-rwlock-wrlock-recursive // In windows, rw_lock seems like a hack. Use empty object and do nothing. struct RWLock { - // FIXME(minqiyang): use mutex here to do fake lock - inline void RDLock() { mutex_.lock(); } - - inline void WRLock() { mutex_.lock(); } - - inline void UNLock() { mutex_.unlock(); } - - private: - std::mutex mutex_; + void RDLock() {} + void WRLock() {} + void UNLock() {} }; #endif -class AutoWRLock { +class RWLockGuard { public: - explicit AutoWRLock(RWLock* rw_lock) : lock_(rw_lock) { Lock(); } - - inline void Lock() { lock_->WRLock(); } - - inline void UnLock() { lock_->UNLock(); } - - ~AutoWRLock() { UnLock(); } - - private: - RWLock* lock_; -}; + enum Status { kUnLock, kWRLock, kRDLock }; + + RWLockGuard(RWLock* rw_lock, Status init_status) + : lock_(rw_lock), status_(Status::kUnLock) { + switch (init_status) { + case Status::kRDLock: { + RDLock(); + break; + } + case Status::kWRLock: { + WRLock(); + break; + } + case Status::kUnLock: { + break; + } + } + } -class AutoRDLock { - public: - explicit AutoRDLock(RWLock* rw_lock) : lock_(rw_lock) { Lock(); } + void WRLock() { + switch (status_) { + case Status::kUnLock: { + lock_->WRLock(); + status_ = Status::kWRLock; + break; + } + case Status::kWRLock: { + break; + } + case Status::kRDLock: { + PADDLE_THROW( + "Please unlock read lock first before invoking write lock."); + break; + } + } + } - inline void Lock() { lock_->RDLock(); } + void RDLock() { + switch (status_) { + case Status::kUnLock: { + lock_->RDLock(); + status_ = Status::kRDLock; + break; + } + case Status::kRDLock: { + break; + } + case Status::kWRLock: { + PADDLE_THROW( + "Please unlock write lock first before invoking read lock."); + break; + } + } + } - inline void UnLock() { lock_->UNLock(); } + void UnLock() { + if (status_ != Status::kUnLock) { + lock_->UNLock(); + status_ = Status::kUnLock; + } + } - ~AutoRDLock() { UnLock(); } + ~RWLockGuard() { UnLock(); } private: RWLock* lock_; + Status status_; }; } // namespace framework diff --git a/paddle/fluid/framework/scope.cc b/paddle/fluid/framework/scope.cc index 4f79d9826099e8cb19b6a21a7723c75baa1745f9..6fa5e99f9f3a7e871f1a742a30803853988ea6eb 100644 --- a/paddle/fluid/framework/scope.cc +++ b/paddle/fluid/framework/scope.cc @@ -47,15 +47,9 @@ DEFINE_bool(fast_eager_deletion_mode, false, // the mutex will cause serious performance issue. // So the mutex is disabled when `ON_INFER`. #ifdef PADDLE_ON_INFERENCE -#define SCOPE_KIDS_READER_LOCK -#define SCOPE_KIDS_WRITER_LOCK -#define SCOPE_VARS_READER_LOCK -#define SCOPE_VARS_WRITER_LOCK +#define SCOPE_LOCK_GUARD #else -#define SCOPE_KIDS_READER_LOCK AutoRDLock auto_lock(&kids_lock_); -#define SCOPE_KIDS_WRITER_LOCK AutoWRLock auto_lock(&kids_lock_); -#define SCOPE_VARS_READER_LOCK AutoRDLock auto_lock(&vars_lock_); -#define SCOPE_VARS_WRITER_LOCK AutoWRLock auto_lock(&vars_lock_); +#define SCOPE_LOCK_GUARD std::lock_guard lock(mutex_); #endif namespace paddle { @@ -73,69 +67,64 @@ bool IsFastEagerDeletionModeEnabled() { return FLAGS_fast_eager_deletion_mode; } Scope::~Scope() { DropKids(); } Scope& Scope::NewScope() const { - Scope* child = new Scope(this); - { - SCOPE_KIDS_WRITER_LOCK - kids_.push_back(child); - } - return *child; + SCOPE_LOCK_GUARD + kids_.push_back(new Scope(this)); + return *kids_.back(); } Variable* Scope::Var(const std::string& name) { - SCOPE_VARS_WRITER_LOCK + SCOPE_LOCK_GUARD return VarInternal(name); } Variable* Scope::Var(std::string* name) { + SCOPE_LOCK_GUARD auto new_name = string::Sprintf("%p.%d", this, vars_.size()); if (name != nullptr) { *name = new_name; } - SCOPE_VARS_WRITER_LOCK return VarInternal(new_name); } Variable* Scope::FindVar(const std::string& name) const { - SCOPE_VARS_READER_LOCK + SCOPE_LOCK_GUARD return FindVarInternal(name); } Variable* Scope::FindLocalVar(const std::string& name) const { - SCOPE_VARS_READER_LOCK + SCOPE_LOCK_GUARD return FindVarLocally(name); } const Scope* Scope::FindScope(const Variable* var) const { - SCOPE_VARS_READER_LOCK + SCOPE_LOCK_GUARD return FindScopeInternal(var); } void Scope::DropKids() { - SCOPE_KIDS_WRITER_LOCK + SCOPE_LOCK_GUARD for (Scope* s : kids_) delete s; kids_.clear(); } bool Scope::HasKid(const Scope* scope) const { - SCOPE_KIDS_READER_LOCK + SCOPE_LOCK_GUARD auto it = std::find(this->kids_.begin(), this->kids_.end(), scope); return it != this->kids_.end(); } std::vector Scope::LocalVarNames() const { + SCOPE_LOCK_GUARD std::vector known_vars; - { - SCOPE_VARS_READER_LOCK - known_vars.reserve(this->vars_.size()); - for (auto& p : vars_) { - known_vars.emplace_back(p.first); - } + known_vars.reserve(this->vars_.size()); + for (auto& p : vars_) { + known_vars.emplace_back(p.first); } return known_vars; } void Scope::DeleteScope(Scope* scope) const { - SCOPE_KIDS_WRITER_LOCK + SCOPE_LOCK_GUARD auto it = std::find(this->kids_.begin(), this->kids_.end(), scope); PADDLE_ENFORCE(it != this->kids_.end(), "%p Cannot find %p as kid scope", this, scope); @@ -149,8 +138,8 @@ void Scope::DeleteScope(Scope* scope) const { } void Scope::EraseVars(const std::vector& var_names) { + SCOPE_LOCK_GUARD std::set var_set(var_names.begin(), var_names.end()); - SCOPE_VARS_WRITER_LOCK for (auto it = vars_.begin(); it != vars_.end();) { if (var_set.find(it->first) != var_set.end()) { it = vars_.erase(it); @@ -162,12 +151,12 @@ void Scope::EraseVars(const std::vector& var_names) { void Scope::Rename(const std::string& origin_name, const std::string& new_name) const { - SCOPE_VARS_WRITER_LOCK + SCOPE_LOCK_GUARD RenameInternal(origin_name, new_name); } std::string Scope::Rename(const std::string& origin_name) const { - SCOPE_VARS_WRITER_LOCK + SCOPE_LOCK_GUARD auto new_name = string::Sprintf("%p.%d", this, vars_.size()); RenameInternal(origin_name, new_name); return new_name; diff --git a/paddle/fluid/framework/scope.h b/paddle/fluid/framework/scope.h index 77ef18414d09cf8e42a7f2a6570c8cd1ef496287..aded1f771cedbf2442ad36d7fab3e6e6caffdc24 100644 --- a/paddle/fluid/framework/scope.h +++ b/paddle/fluid/framework/scope.h @@ -14,19 +14,12 @@ limitations under the License. */ #pragma once -extern "C" { -#include -} - -#include #include -#include +#include // NOLINT #include #include -#include #include -#include "paddle/fluid/framework/rw_lock.h" #include "paddle/fluid/framework/variable.h" #include "paddle/fluid/platform/macros.h" @@ -38,14 +31,6 @@ bool IsFastEagerDeletionModeEnabled(); class Scope; -namespace inner { -struct KeyHasher { - std::size_t operator()(const std::string& key) const { - return XXH32(key.c_str(), key.size(), 1); - } -}; -} // namespace inner - /** * @brief Scope that manage all variables. * @@ -110,14 +95,7 @@ class Scope { std::string Rename(const std::string& origin_name) const; protected: - mutable std::unordered_map, - inner::KeyHasher> - vars_; - // mutable tsl::robin_map< - // std::string, std::unique_ptr, std::hash, - // std::equal_to, - // std::allocator>>, true> - // vars_; + mutable std::unordered_map> vars_; private: // Call Scope::NewScope for a sub-scope. @@ -146,8 +124,7 @@ class Scope { DISABLE_COPY_AND_ASSIGN(Scope); private: - mutable RWLock kids_lock_; - mutable RWLock vars_lock_; + mutable std::mutex mutex_; }; // Generate some debug string about the inherience structure of scope, quite diff --git a/paddle/fluid/framework/spin_lock.h b/paddle/fluid/framework/spin_lock.h deleted file mode 100644 index 11a763d655abb0f422cb2647a820c80e79f33f1b..0000000000000000000000000000000000000000 --- a/paddle/fluid/framework/spin_lock.h +++ /dev/null @@ -1,71 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#if !defined(_WIN32) -#include -#else -#include // NOLINT -#endif // !_WIN32 - -#include "paddle/fluid/platform/enforce.h" - -namespace paddle { -namespace framework { - -#if !defined(_WIN32) -struct SpinLock { - SpinLock() { pthread_spin_init(&lock_, PTHREAD_PROCESS_PRIVATE); } - - ~SpinLock() { pthread_spin_destroy(&lock_); } - - void Lock() { - PADDLE_ENFORCE_EQ(pthread_spin_lock(&lock_), 0, "acquire spin lock failed"); - } - - void Unlock() { - PADDLE_ENFORCE_EQ(pthread_spin_unlock(&lock_), 0, - "release spin lock failed"); - } - - private: - pthread_spinlock_t lock_; -}; -#else -// FIXME(minqiyang): use mutex here to do fake spin lock -struct SpinLock { - void Lock() { mutex_.lock(); } - - void Unlock() { mutex_.lock(); } - - private: - std::mutex mutex_; -}; -#endif - -class AutoSpinLock { - public: - explicit SpinLockGuard(SpinLock* spin_lock) : lock_(spin_lock) { - lock_->Lock(); - } - - ~SpinLockGuard() { lock_->Unlock(); } - - private: - SpinLock* lock_; -}; - -} // namespace framework -} // namespace paddle diff --git a/paddle/fluid/operators/math/concat_and_split.cu b/paddle/fluid/operators/math/concat_and_split.cu index 930d851696efdf8dc92f98638f2d09cac7eb304e..760a065c1081d1e55901774b258ba524471b856b 100644 --- a/paddle/fluid/operators/math/concat_and_split.cu +++ b/paddle/fluid/operators/math/concat_and_split.cu @@ -180,7 +180,7 @@ class ConcatFunctor { } // Wait() must be called because `inputs_data` may be destructed before // kernel ends - /* context.Wait(); */ + context.Wait(); } }; @@ -258,7 +258,7 @@ class SplitFunctor { } // Wait() must be called because `outputs_data` may be destructed before // kernel ends - /* context.Wait(); */ + context.Wait(); } }; diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index c108c8275669feeb4c313dc57107adfc632ce61d..88a2a5276ab52e25b4f790e3a2f1386ed0715b4e 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -822,7 +822,7 @@ All parameter, weight, gradient are variables in Paddle. R"DOC(The type is INT, num_iteration_per_drop_scope indicates how many iterations to clean up the temp variables which is generated during execution. It may make the execution faster, - because the temp variable's shape maybe the same between two iterations. Default 1. + because the temp variable's shape maybe the same between two iterations. Default 100. NOTES: 1. If you fetch data when calling the 'run', the ParallelExecutor diff --git a/python/paddle/fluid/profiler.py b/python/paddle/fluid/profiler.py index 78f7a6ac085f676d96c26c16fe9f48ae7f1d4d58..e05885f5f5bfc169828c1c6e723dffff098c3c2e 100644 --- a/python/paddle/fluid/profiler.py +++ b/python/paddle/fluid/profiler.py @@ -92,7 +92,6 @@ def cuda_profiler(output_file, output_mode=None, config=None): config_file = 'nvprof_config_file' with open(config_file, 'wb') as fp: fp.writelines([six.b("%s\n" % item) for item in config]) - #Comment this for nvprof core.nvprof_init(output_file, output_mode, config_file) # Enables profiler collection by the active CUDA profiling tool. core.nvprof_start()