提交 0a4b6fc0 编写于 作者: M minqiyang

Remove unnessesary code

test=develop
上级 53619a79
include(ExternalProject)
set(ROBIN_MAP_SOURCE_DIR ${THIRD_PARTY_PATH}/robin_map)
set(ROBIN_MAP_INCLUDE_DIR ${ROBIN_MAP_SOURCE_DIR}/src/extern_robin_map/include)
include_directories(${ROBIN_MAP_INCLUDE_DIR})
ExternalProject_Add(
extern_robin_map
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/Tessil/robin-map.git"
GIT_TAG "v0.5.0"
PREFIX ${ROBIN_MAP_SOURCE_DIR}
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)
if(${CMAKE_VERSION} VERSION_LESS "3.3.0")
set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/robin_map_dummy.c)
file(WRITE ${dummyfile} "const char *dummy = \"${dummyfile}\";")
add_library(robin_map STATIC ${dummyfile})
else()
add_library(robin_map INTERFACE)
endif()
add_dependencies(robin_map extern_robin_map)
LIST(APPEND externl_project_dependencies robin_map)
......@@ -83,7 +83,7 @@ cc_test(variable_test SRCS variable_test.cc)
cc_library(threadpool SRCS threadpool.cc DEPS enforce)
cc_test(threadpool_test SRCS threadpool_test.cc DEPS threadpool)
cc_library(scope SRCS scope.cc DEPS glog threadpool xxhash)
cc_library(scope SRCS scope.cc DEPS glog threadpool)
cc_test(scope_test SRCS scope_test.cc DEPS scope)
cc_library(data_device_transform SRCS data_device_transform.cc DEPS tensor)
......
......@@ -25,7 +25,7 @@ struct ExecutionStrategy {
size_t num_threads_{0};
bool use_cuda_{true};
bool allow_op_delay_{false};
size_t num_iteration_per_drop_scope_{1};
size_t num_iteration_per_drop_scope_{100};
ExecutorType type_{kDefault};
bool dry_run_{false};
};
......
......@@ -64,24 +64,21 @@ FeedFetchList ScopeBufferedSSAGraphExecutor::Run(
}
platform::RecordEvent e("ScopeBufferedSSAGraphExecutorAfterRun", nullptr);
++drop_scope_counter_;
drop_scope_counter_ += 1;
if (!fetch_tensors.empty()) {
if (!fetch_tensors.empty() ||
drop_scope_counter_ == strategy_.num_iteration_per_drop_scope_) {
drop_scope_counter_ = 0;
// Wait All computational streams
for (auto p : places_) {
platform::DeviceContextPool::Instance().Get(p)->Wait();
}
}
if (drop_scope_counter_ == strategy_.num_iteration_per_drop_scope_) {
drop_scope_counter_ = 0;
for (auto &scope : local_scopes_) {
auto &local_scope =
*scope->Var(details::kLocalExecScopeName)->GetMutable<Scope *>();
scope->DeleteScope(local_scope);
}
}
if (eptr) {
std::rethrow_exception(eptr);
} else {
......
......@@ -20,10 +20,6 @@ limitations under the License. */
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/var_desc.h"
DEFINE_bool(enforce_when_check_program, true,
"Checking whether the program is correct or not. We will log "
"errors rather than throwing exceptions if this flag turned off");
namespace paddle {
namespace framework {
namespace ir {
......@@ -48,21 +44,12 @@ void CheckProgram(const ProgramDesc &program) {
break;
case _INT(OpRole::kBackward):
case _INT(OpRole::kBackward) | _INT(OpRole::kLoss):
if (!FLAGS_enforce_when_check_program) {
PADDLE_ENFORCE(
visit.find(_INT(OpRole::kOptimize)) == visit.end(),
"Cannot add backward operator %s after optimize operator.",
op->Type());
} else {
if (visit.find(_INT(OpRole::kOptimize)) != visit.end()) {
LOG(ERROR)
<< "Cannot add backward operator %s after optimize operator."
<< op->Type();
}
}
break;
case _INT(OpRole::kForward) | _INT(OpRole::kLoss):
if (!FLAGS_enforce_when_check_program) {
PADDLE_ENFORCE(visit.find(_INT(OpRole::kBackward) |
_INT(OpRole::kLoss)) == visit.end(),
"Cannot add backward|loss operator before "
......@@ -72,32 +59,12 @@ void CheckProgram(const ProgramDesc &program) {
visit.find(_INT(OpRole::kOptimize)) == visit.end(),
"Cannot add forward|loss operator %s after optimize operator.",
op->Type());
} else {
if (visit.find(_INT(OpRole::kBackward) | _INT(OpRole::kLoss)) !=
visit.end()) {
LOG(ERROR) << "Cannot add backward|loss operator before "
<< "forward|loss operator %s." << op->Type();
}
if (visit.find(_INT(OpRole::kOptimize)) != visit.end()) {
LOG(ERROR) << "Cannot add forward|loss operator %s after optimize "
"operator."
<< op->Type();
}
}
break;
case _INT(OpRole::kOptimize):
case _INT(OpRole::kOptimize) | _INT(OpRole::kLRSched):
if (!FLAGS_enforce_when_check_program) {
PADDLE_ENFORCE(visit.find(_INT(OpRole::kBackward)) != visit.end(),
"Optimize operators %s must follow backward operator.",
op->Type());
} else {
if (visit.find(_INT(OpRole::kBackward)) == visit.end()) {
LOG(ERROR) << "Optimize operators %s must follow backward operator."
<< op->Type();
}
}
break;
case _INT(OpRole::kLRSched):
case _INT(OpRole::kDist):
......
......@@ -16,8 +16,6 @@ limitations under the License. */
#if !defined(_WIN32)
#include <pthread.h>
#else
#include <mutex> // NOLINT
#endif // !_WIN32
#include "paddle/fluid/platform/enforce.h"
......@@ -31,17 +29,17 @@ struct RWLock {
~RWLock() { pthread_rwlock_destroy(&lock_); }
inline void RDLock() {
void RDLock() {
PADDLE_ENFORCE_EQ(pthread_rwlock_rdlock(&lock_), 0,
"acquire read lock failed");
}
inline void WRLock() {
void WRLock() {
PADDLE_ENFORCE_EQ(pthread_rwlock_wrlock(&lock_), 0,
"acquire write lock failed");
}
inline void UNLock() {
void UNLock() {
PADDLE_ENFORCE_EQ(pthread_rwlock_unlock(&lock_), 0, "unlock failed");
}
......@@ -53,44 +51,81 @@ struct RWLock {
// https://stackoverflow.com/questions/7125250/making-pthread-rwlock-wrlock-recursive
// In windows, rw_lock seems like a hack. Use empty object and do nothing.
struct RWLock {
// FIXME(minqiyang): use mutex here to do fake lock
inline void RDLock() { mutex_.lock(); }
inline void WRLock() { mutex_.lock(); }
inline void UNLock() { mutex_.unlock(); }
private:
std::mutex mutex_;
void RDLock() {}
void WRLock() {}
void UNLock() {}
};
#endif
class AutoWRLock {
class RWLockGuard {
public:
explicit AutoWRLock(RWLock* rw_lock) : lock_(rw_lock) { Lock(); }
inline void Lock() { lock_->WRLock(); }
inline void UnLock() { lock_->UNLock(); }
~AutoWRLock() { UnLock(); }
private:
RWLock* lock_;
};
enum Status { kUnLock, kWRLock, kRDLock };
RWLockGuard(RWLock* rw_lock, Status init_status)
: lock_(rw_lock), status_(Status::kUnLock) {
switch (init_status) {
case Status::kRDLock: {
RDLock();
break;
}
case Status::kWRLock: {
WRLock();
break;
}
case Status::kUnLock: {
break;
}
}
}
class AutoRDLock {
public:
explicit AutoRDLock(RWLock* rw_lock) : lock_(rw_lock) { Lock(); }
void WRLock() {
switch (status_) {
case Status::kUnLock: {
lock_->WRLock();
status_ = Status::kWRLock;
break;
}
case Status::kWRLock: {
break;
}
case Status::kRDLock: {
PADDLE_THROW(
"Please unlock read lock first before invoking write lock.");
break;
}
}
}
inline void Lock() { lock_->RDLock(); }
void RDLock() {
switch (status_) {
case Status::kUnLock: {
lock_->RDLock();
status_ = Status::kRDLock;
break;
}
case Status::kRDLock: {
break;
}
case Status::kWRLock: {
PADDLE_THROW(
"Please unlock write lock first before invoking read lock.");
break;
}
}
}
inline void UnLock() { lock_->UNLock(); }
void UnLock() {
if (status_ != Status::kUnLock) {
lock_->UNLock();
status_ = Status::kUnLock;
}
}
~AutoRDLock() { UnLock(); }
~RWLockGuard() { UnLock(); }
private:
RWLock* lock_;
Status status_;
};
} // namespace framework
......
......@@ -47,15 +47,9 @@ DEFINE_bool(fast_eager_deletion_mode, false,
// the mutex will cause serious performance issue.
// So the mutex is disabled when `ON_INFER`.
#ifdef PADDLE_ON_INFERENCE
#define SCOPE_KIDS_READER_LOCK
#define SCOPE_KIDS_WRITER_LOCK
#define SCOPE_VARS_READER_LOCK
#define SCOPE_VARS_WRITER_LOCK
#define SCOPE_LOCK_GUARD
#else
#define SCOPE_KIDS_READER_LOCK AutoRDLock auto_lock(&kids_lock_);
#define SCOPE_KIDS_WRITER_LOCK AutoWRLock auto_lock(&kids_lock_);
#define SCOPE_VARS_READER_LOCK AutoRDLock auto_lock(&vars_lock_);
#define SCOPE_VARS_WRITER_LOCK AutoWRLock auto_lock(&vars_lock_);
#define SCOPE_LOCK_GUARD std::lock_guard<std::mutex> lock(mutex_);
#endif
namespace paddle {
......@@ -73,69 +67,64 @@ bool IsFastEagerDeletionModeEnabled() { return FLAGS_fast_eager_deletion_mode; }
Scope::~Scope() { DropKids(); }
Scope& Scope::NewScope() const {
Scope* child = new Scope(this);
{
SCOPE_KIDS_WRITER_LOCK
kids_.push_back(child);
}
return *child;
SCOPE_LOCK_GUARD
kids_.push_back(new Scope(this));
return *kids_.back();
}
Variable* Scope::Var(const std::string& name) {
SCOPE_VARS_WRITER_LOCK
SCOPE_LOCK_GUARD
return VarInternal(name);
}
Variable* Scope::Var(std::string* name) {
SCOPE_LOCK_GUARD
auto new_name = string::Sprintf("%p.%d", this, vars_.size());
if (name != nullptr) {
*name = new_name;
}
SCOPE_VARS_WRITER_LOCK
return VarInternal(new_name);
}
Variable* Scope::FindVar(const std::string& name) const {
SCOPE_VARS_READER_LOCK
SCOPE_LOCK_GUARD
return FindVarInternal(name);
}
Variable* Scope::FindLocalVar(const std::string& name) const {
SCOPE_VARS_READER_LOCK
SCOPE_LOCK_GUARD
return FindVarLocally(name);
}
const Scope* Scope::FindScope(const Variable* var) const {
SCOPE_VARS_READER_LOCK
SCOPE_LOCK_GUARD
return FindScopeInternal(var);
}
void Scope::DropKids() {
SCOPE_KIDS_WRITER_LOCK
SCOPE_LOCK_GUARD
for (Scope* s : kids_) delete s;
kids_.clear();
}
bool Scope::HasKid(const Scope* scope) const {
SCOPE_KIDS_READER_LOCK
SCOPE_LOCK_GUARD
auto it = std::find(this->kids_.begin(), this->kids_.end(), scope);
return it != this->kids_.end();
}
std::vector<std::string> Scope::LocalVarNames() const {
SCOPE_LOCK_GUARD
std::vector<std::string> known_vars;
{
SCOPE_VARS_READER_LOCK
known_vars.reserve(this->vars_.size());
for (auto& p : vars_) {
known_vars.emplace_back(p.first);
}
}
return known_vars;
}
void Scope::DeleteScope(Scope* scope) const {
SCOPE_KIDS_WRITER_LOCK
SCOPE_LOCK_GUARD
auto it = std::find(this->kids_.begin(), this->kids_.end(), scope);
PADDLE_ENFORCE(it != this->kids_.end(), "%p Cannot find %p as kid scope",
this, scope);
......@@ -149,8 +138,8 @@ void Scope::DeleteScope(Scope* scope) const {
}
void Scope::EraseVars(const std::vector<std::string>& var_names) {
SCOPE_LOCK_GUARD
std::set<std::string> var_set(var_names.begin(), var_names.end());
SCOPE_VARS_WRITER_LOCK
for (auto it = vars_.begin(); it != vars_.end();) {
if (var_set.find(it->first) != var_set.end()) {
it = vars_.erase(it);
......@@ -162,12 +151,12 @@ void Scope::EraseVars(const std::vector<std::string>& var_names) {
void Scope::Rename(const std::string& origin_name,
const std::string& new_name) const {
SCOPE_VARS_WRITER_LOCK
SCOPE_LOCK_GUARD
RenameInternal(origin_name, new_name);
}
std::string Scope::Rename(const std::string& origin_name) const {
SCOPE_VARS_WRITER_LOCK
SCOPE_LOCK_GUARD
auto new_name = string::Sprintf("%p.%d", this, vars_.size());
RenameInternal(origin_name, new_name);
return new_name;
......
......@@ -14,19 +14,12 @@ limitations under the License. */
#pragma once
extern "C" {
#include <xxhash.h>
}
#include <functional>
#include <list>
#include <memory>
#include <mutex> // NOLINT
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/rw_lock.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/platform/macros.h"
......@@ -38,14 +31,6 @@ bool IsFastEagerDeletionModeEnabled();
class Scope;
namespace inner {
struct KeyHasher {
std::size_t operator()(const std::string& key) const {
return XXH32(key.c_str(), key.size(), 1);
}
};
} // namespace inner
/**
* @brief Scope that manage all variables.
*
......@@ -110,14 +95,7 @@ class Scope {
std::string Rename(const std::string& origin_name) const;
protected:
mutable std::unordered_map<std::string, std::unique_ptr<Variable>,
inner::KeyHasher>
vars_;
// mutable tsl::robin_map<
// std::string, std::unique_ptr<Variable>, std::hash<std::string>,
// std::equal_to<std::string>,
// std::allocator<std::pair<std::string, std::unique_ptr<Variable>>>, true>
// vars_;
mutable std::unordered_map<std::string, std::unique_ptr<Variable>> vars_;
private:
// Call Scope::NewScope for a sub-scope.
......@@ -146,8 +124,7 @@ class Scope {
DISABLE_COPY_AND_ASSIGN(Scope);
private:
mutable RWLock kids_lock_;
mutable RWLock vars_lock_;
mutable std::mutex mutex_;
};
// Generate some debug string about the inherience structure of scope, quite
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#if !defined(_WIN32)
#include <pthread.h>
#else
#include <mutex> // NOLINT
#endif // !_WIN32
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace framework {
#if !defined(_WIN32)
struct SpinLock {
SpinLock() { pthread_spin_init(&lock_, PTHREAD_PROCESS_PRIVATE); }
~SpinLock() { pthread_spin_destroy(&lock_); }
void Lock() {
PADDLE_ENFORCE_EQ(pthread_spin_lock(&lock_), 0, "acquire spin lock failed");
}
void Unlock() {
PADDLE_ENFORCE_EQ(pthread_spin_unlock(&lock_), 0,
"release spin lock failed");
}
private:
pthread_spinlock_t lock_;
};
#else
// FIXME(minqiyang): use mutex here to do fake spin lock
struct SpinLock {
void Lock() { mutex_.lock(); }
void Unlock() { mutex_.lock(); }
private:
std::mutex mutex_;
};
#endif
class AutoSpinLock {
public:
explicit SpinLockGuard(SpinLock* spin_lock) : lock_(spin_lock) {
lock_->Lock();
}
~SpinLockGuard() { lock_->Unlock(); }
private:
SpinLock* lock_;
};
} // namespace framework
} // namespace paddle
......@@ -180,7 +180,7 @@ class ConcatFunctor<platform::CUDADeviceContext, T> {
}
// Wait() must be called because `inputs_data` may be destructed before
// kernel ends
/* context.Wait(); */
context.Wait();
}
};
......@@ -258,7 +258,7 @@ class SplitFunctor<platform::CUDADeviceContext, T> {
}
// Wait() must be called because `outputs_data` may be destructed before
// kernel ends
/* context.Wait(); */
context.Wait();
}
};
......
......@@ -822,7 +822,7 @@ All parameter, weight, gradient are variables in Paddle.
R"DOC(The type is INT, num_iteration_per_drop_scope indicates how
many iterations to clean up the temp variables which
is generated during execution. It may make the execution faster,
because the temp variable's shape maybe the same between two iterations. Default 1.
because the temp variable's shape maybe the same between two iterations. Default 100.
NOTES:
1. If you fetch data when calling the 'run', the ParallelExecutor
......
......@@ -92,7 +92,6 @@ def cuda_profiler(output_file, output_mode=None, config=None):
config_file = 'nvprof_config_file'
with open(config_file, 'wb') as fp:
fp.writelines([six.b("%s\n" % item) for item in config])
#Comment this for nvprof
core.nvprof_init(output_file, output_mode, config_file)
# Enables profiler collection by the active CUDA profiling tool.
core.nvprof_start()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册