提交 0a4b6fc0 编写于 作者: M minqiyang

Remove unnessesary code

test=develop
上级 53619a79
include(ExternalProject)
set(ROBIN_MAP_SOURCE_DIR ${THIRD_PARTY_PATH}/robin_map)
set(ROBIN_MAP_INCLUDE_DIR ${ROBIN_MAP_SOURCE_DIR}/src/extern_robin_map/include)
include_directories(${ROBIN_MAP_INCLUDE_DIR})
ExternalProject_Add(
extern_robin_map
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/Tessil/robin-map.git"
GIT_TAG "v0.5.0"
PREFIX ${ROBIN_MAP_SOURCE_DIR}
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)
if(${CMAKE_VERSION} VERSION_LESS "3.3.0")
set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/robin_map_dummy.c)
file(WRITE ${dummyfile} "const char *dummy = \"${dummyfile}\";")
add_library(robin_map STATIC ${dummyfile})
else()
add_library(robin_map INTERFACE)
endif()
add_dependencies(robin_map extern_robin_map)
LIST(APPEND externl_project_dependencies robin_map)
...@@ -83,7 +83,7 @@ cc_test(variable_test SRCS variable_test.cc) ...@@ -83,7 +83,7 @@ cc_test(variable_test SRCS variable_test.cc)
cc_library(threadpool SRCS threadpool.cc DEPS enforce) cc_library(threadpool SRCS threadpool.cc DEPS enforce)
cc_test(threadpool_test SRCS threadpool_test.cc DEPS threadpool) cc_test(threadpool_test SRCS threadpool_test.cc DEPS threadpool)
cc_library(scope SRCS scope.cc DEPS glog threadpool xxhash) cc_library(scope SRCS scope.cc DEPS glog threadpool)
cc_test(scope_test SRCS scope_test.cc DEPS scope) cc_test(scope_test SRCS scope_test.cc DEPS scope)
cc_library(data_device_transform SRCS data_device_transform.cc DEPS tensor) cc_library(data_device_transform SRCS data_device_transform.cc DEPS tensor)
......
...@@ -25,7 +25,7 @@ struct ExecutionStrategy { ...@@ -25,7 +25,7 @@ struct ExecutionStrategy {
size_t num_threads_{0}; size_t num_threads_{0};
bool use_cuda_{true}; bool use_cuda_{true};
bool allow_op_delay_{false}; bool allow_op_delay_{false};
size_t num_iteration_per_drop_scope_{1}; size_t num_iteration_per_drop_scope_{100};
ExecutorType type_{kDefault}; ExecutorType type_{kDefault};
bool dry_run_{false}; bool dry_run_{false};
}; };
......
...@@ -64,24 +64,21 @@ FeedFetchList ScopeBufferedSSAGraphExecutor::Run( ...@@ -64,24 +64,21 @@ FeedFetchList ScopeBufferedSSAGraphExecutor::Run(
} }
platform::RecordEvent e("ScopeBufferedSSAGraphExecutorAfterRun", nullptr); platform::RecordEvent e("ScopeBufferedSSAGraphExecutorAfterRun", nullptr);
++drop_scope_counter_; drop_scope_counter_ += 1;
if (!fetch_tensors.empty()) { if (!fetch_tensors.empty() ||
drop_scope_counter_ == strategy_.num_iteration_per_drop_scope_) {
drop_scope_counter_ = 0;
// Wait All computational streams // Wait All computational streams
for (auto p : places_) { for (auto p : places_) {
platform::DeviceContextPool::Instance().Get(p)->Wait(); platform::DeviceContextPool::Instance().Get(p)->Wait();
} }
}
if (drop_scope_counter_ == strategy_.num_iteration_per_drop_scope_) {
drop_scope_counter_ = 0;
for (auto &scope : local_scopes_) { for (auto &scope : local_scopes_) {
auto &local_scope = auto &local_scope =
*scope->Var(details::kLocalExecScopeName)->GetMutable<Scope *>(); *scope->Var(details::kLocalExecScopeName)->GetMutable<Scope *>();
scope->DeleteScope(local_scope); scope->DeleteScope(local_scope);
} }
} }
if (eptr) { if (eptr) {
std::rethrow_exception(eptr); std::rethrow_exception(eptr);
} else { } else {
......
...@@ -20,10 +20,6 @@ limitations under the License. */ ...@@ -20,10 +20,6 @@ limitations under the License. */
#include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/var_desc.h" #include "paddle/fluid/framework/var_desc.h"
DEFINE_bool(enforce_when_check_program, true,
"Checking whether the program is correct or not. We will log "
"errors rather than throwing exceptions if this flag turned off");
namespace paddle { namespace paddle {
namespace framework { namespace framework {
namespace ir { namespace ir {
...@@ -48,21 +44,12 @@ void CheckProgram(const ProgramDesc &program) { ...@@ -48,21 +44,12 @@ void CheckProgram(const ProgramDesc &program) {
break; break;
case _INT(OpRole::kBackward): case _INT(OpRole::kBackward):
case _INT(OpRole::kBackward) | _INT(OpRole::kLoss): case _INT(OpRole::kBackward) | _INT(OpRole::kLoss):
if (!FLAGS_enforce_when_check_program) {
PADDLE_ENFORCE( PADDLE_ENFORCE(
visit.find(_INT(OpRole::kOptimize)) == visit.end(), visit.find(_INT(OpRole::kOptimize)) == visit.end(),
"Cannot add backward operator %s after optimize operator.", "Cannot add backward operator %s after optimize operator.",
op->Type()); op->Type());
} else {
if (visit.find(_INT(OpRole::kOptimize)) != visit.end()) {
LOG(ERROR)
<< "Cannot add backward operator %s after optimize operator."
<< op->Type();
}
}
break; break;
case _INT(OpRole::kForward) | _INT(OpRole::kLoss): case _INT(OpRole::kForward) | _INT(OpRole::kLoss):
if (!FLAGS_enforce_when_check_program) {
PADDLE_ENFORCE(visit.find(_INT(OpRole::kBackward) | PADDLE_ENFORCE(visit.find(_INT(OpRole::kBackward) |
_INT(OpRole::kLoss)) == visit.end(), _INT(OpRole::kLoss)) == visit.end(),
"Cannot add backward|loss operator before " "Cannot add backward|loss operator before "
...@@ -72,32 +59,12 @@ void CheckProgram(const ProgramDesc &program) { ...@@ -72,32 +59,12 @@ void CheckProgram(const ProgramDesc &program) {
visit.find(_INT(OpRole::kOptimize)) == visit.end(), visit.find(_INT(OpRole::kOptimize)) == visit.end(),
"Cannot add forward|loss operator %s after optimize operator.", "Cannot add forward|loss operator %s after optimize operator.",
op->Type()); op->Type());
} else {
if (visit.find(_INT(OpRole::kBackward) | _INT(OpRole::kLoss)) !=
visit.end()) {
LOG(ERROR) << "Cannot add backward|loss operator before "
<< "forward|loss operator %s." << op->Type();
}
if (visit.find(_INT(OpRole::kOptimize)) != visit.end()) {
LOG(ERROR) << "Cannot add forward|loss operator %s after optimize "
"operator."
<< op->Type();
}
}
break; break;
case _INT(OpRole::kOptimize): case _INT(OpRole::kOptimize):
case _INT(OpRole::kOptimize) | _INT(OpRole::kLRSched): case _INT(OpRole::kOptimize) | _INT(OpRole::kLRSched):
if (!FLAGS_enforce_when_check_program) {
PADDLE_ENFORCE(visit.find(_INT(OpRole::kBackward)) != visit.end(), PADDLE_ENFORCE(visit.find(_INT(OpRole::kBackward)) != visit.end(),
"Optimize operators %s must follow backward operator.", "Optimize operators %s must follow backward operator.",
op->Type()); op->Type());
} else {
if (visit.find(_INT(OpRole::kBackward)) == visit.end()) {
LOG(ERROR) << "Optimize operators %s must follow backward operator."
<< op->Type();
}
}
break; break;
case _INT(OpRole::kLRSched): case _INT(OpRole::kLRSched):
case _INT(OpRole::kDist): case _INT(OpRole::kDist):
......
...@@ -16,8 +16,6 @@ limitations under the License. */ ...@@ -16,8 +16,6 @@ limitations under the License. */
#if !defined(_WIN32) #if !defined(_WIN32)
#include <pthread.h> #include <pthread.h>
#else
#include <mutex> // NOLINT
#endif // !_WIN32 #endif // !_WIN32
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
...@@ -31,17 +29,17 @@ struct RWLock { ...@@ -31,17 +29,17 @@ struct RWLock {
~RWLock() { pthread_rwlock_destroy(&lock_); } ~RWLock() { pthread_rwlock_destroy(&lock_); }
inline void RDLock() { void RDLock() {
PADDLE_ENFORCE_EQ(pthread_rwlock_rdlock(&lock_), 0, PADDLE_ENFORCE_EQ(pthread_rwlock_rdlock(&lock_), 0,
"acquire read lock failed"); "acquire read lock failed");
} }
inline void WRLock() { void WRLock() {
PADDLE_ENFORCE_EQ(pthread_rwlock_wrlock(&lock_), 0, PADDLE_ENFORCE_EQ(pthread_rwlock_wrlock(&lock_), 0,
"acquire write lock failed"); "acquire write lock failed");
} }
inline void UNLock() { void UNLock() {
PADDLE_ENFORCE_EQ(pthread_rwlock_unlock(&lock_), 0, "unlock failed"); PADDLE_ENFORCE_EQ(pthread_rwlock_unlock(&lock_), 0, "unlock failed");
} }
...@@ -53,44 +51,81 @@ struct RWLock { ...@@ -53,44 +51,81 @@ struct RWLock {
// https://stackoverflow.com/questions/7125250/making-pthread-rwlock-wrlock-recursive // https://stackoverflow.com/questions/7125250/making-pthread-rwlock-wrlock-recursive
// In windows, rw_lock seems like a hack. Use empty object and do nothing. // In windows, rw_lock seems like a hack. Use empty object and do nothing.
struct RWLock { struct RWLock {
// FIXME(minqiyang): use mutex here to do fake lock void RDLock() {}
inline void RDLock() { mutex_.lock(); } void WRLock() {}
void UNLock() {}
inline void WRLock() { mutex_.lock(); }
inline void UNLock() { mutex_.unlock(); }
private:
std::mutex mutex_;
}; };
#endif #endif
class AutoWRLock { class RWLockGuard {
public: public:
explicit AutoWRLock(RWLock* rw_lock) : lock_(rw_lock) { Lock(); } enum Status { kUnLock, kWRLock, kRDLock };
inline void Lock() { lock_->WRLock(); } RWLockGuard(RWLock* rw_lock, Status init_status)
: lock_(rw_lock), status_(Status::kUnLock) {
inline void UnLock() { lock_->UNLock(); } switch (init_status) {
case Status::kRDLock: {
~AutoWRLock() { UnLock(); } RDLock();
break;
private: }
RWLock* lock_; case Status::kWRLock: {
}; WRLock();
break;
}
case Status::kUnLock: {
break;
}
}
}
class AutoRDLock { void WRLock() {
public: switch (status_) {
explicit AutoRDLock(RWLock* rw_lock) : lock_(rw_lock) { Lock(); } case Status::kUnLock: {
lock_->WRLock();
status_ = Status::kWRLock;
break;
}
case Status::kWRLock: {
break;
}
case Status::kRDLock: {
PADDLE_THROW(
"Please unlock read lock first before invoking write lock.");
break;
}
}
}
inline void Lock() { lock_->RDLock(); } void RDLock() {
switch (status_) {
case Status::kUnLock: {
lock_->RDLock();
status_ = Status::kRDLock;
break;
}
case Status::kRDLock: {
break;
}
case Status::kWRLock: {
PADDLE_THROW(
"Please unlock write lock first before invoking read lock.");
break;
}
}
}
inline void UnLock() { lock_->UNLock(); } void UnLock() {
if (status_ != Status::kUnLock) {
lock_->UNLock();
status_ = Status::kUnLock;
}
}
~AutoRDLock() { UnLock(); } ~RWLockGuard() { UnLock(); }
private: private:
RWLock* lock_; RWLock* lock_;
Status status_;
}; };
} // namespace framework } // namespace framework
......
...@@ -47,15 +47,9 @@ DEFINE_bool(fast_eager_deletion_mode, false, ...@@ -47,15 +47,9 @@ DEFINE_bool(fast_eager_deletion_mode, false,
// the mutex will cause serious performance issue. // the mutex will cause serious performance issue.
// So the mutex is disabled when `ON_INFER`. // So the mutex is disabled when `ON_INFER`.
#ifdef PADDLE_ON_INFERENCE #ifdef PADDLE_ON_INFERENCE
#define SCOPE_KIDS_READER_LOCK #define SCOPE_LOCK_GUARD
#define SCOPE_KIDS_WRITER_LOCK
#define SCOPE_VARS_READER_LOCK
#define SCOPE_VARS_WRITER_LOCK
#else #else
#define SCOPE_KIDS_READER_LOCK AutoRDLock auto_lock(&kids_lock_); #define SCOPE_LOCK_GUARD std::lock_guard<std::mutex> lock(mutex_);
#define SCOPE_KIDS_WRITER_LOCK AutoWRLock auto_lock(&kids_lock_);
#define SCOPE_VARS_READER_LOCK AutoRDLock auto_lock(&vars_lock_);
#define SCOPE_VARS_WRITER_LOCK AutoWRLock auto_lock(&vars_lock_);
#endif #endif
namespace paddle { namespace paddle {
...@@ -73,69 +67,64 @@ bool IsFastEagerDeletionModeEnabled() { return FLAGS_fast_eager_deletion_mode; } ...@@ -73,69 +67,64 @@ bool IsFastEagerDeletionModeEnabled() { return FLAGS_fast_eager_deletion_mode; }
Scope::~Scope() { DropKids(); } Scope::~Scope() { DropKids(); }
Scope& Scope::NewScope() const { Scope& Scope::NewScope() const {
Scope* child = new Scope(this); SCOPE_LOCK_GUARD
{ kids_.push_back(new Scope(this));
SCOPE_KIDS_WRITER_LOCK return *kids_.back();
kids_.push_back(child);
}
return *child;
} }
Variable* Scope::Var(const std::string& name) { Variable* Scope::Var(const std::string& name) {
SCOPE_VARS_WRITER_LOCK SCOPE_LOCK_GUARD
return VarInternal(name); return VarInternal(name);
} }
Variable* Scope::Var(std::string* name) { Variable* Scope::Var(std::string* name) {
SCOPE_LOCK_GUARD
auto new_name = string::Sprintf("%p.%d", this, vars_.size()); auto new_name = string::Sprintf("%p.%d", this, vars_.size());
if (name != nullptr) { if (name != nullptr) {
*name = new_name; *name = new_name;
} }
SCOPE_VARS_WRITER_LOCK
return VarInternal(new_name); return VarInternal(new_name);
} }
Variable* Scope::FindVar(const std::string& name) const { Variable* Scope::FindVar(const std::string& name) const {
SCOPE_VARS_READER_LOCK SCOPE_LOCK_GUARD
return FindVarInternal(name); return FindVarInternal(name);
} }
Variable* Scope::FindLocalVar(const std::string& name) const { Variable* Scope::FindLocalVar(const std::string& name) const {
SCOPE_VARS_READER_LOCK SCOPE_LOCK_GUARD
return FindVarLocally(name); return FindVarLocally(name);
} }
const Scope* Scope::FindScope(const Variable* var) const { const Scope* Scope::FindScope(const Variable* var) const {
SCOPE_VARS_READER_LOCK SCOPE_LOCK_GUARD
return FindScopeInternal(var); return FindScopeInternal(var);
} }
void Scope::DropKids() { void Scope::DropKids() {
SCOPE_KIDS_WRITER_LOCK SCOPE_LOCK_GUARD
for (Scope* s : kids_) delete s; for (Scope* s : kids_) delete s;
kids_.clear(); kids_.clear();
} }
bool Scope::HasKid(const Scope* scope) const { bool Scope::HasKid(const Scope* scope) const {
SCOPE_KIDS_READER_LOCK SCOPE_LOCK_GUARD
auto it = std::find(this->kids_.begin(), this->kids_.end(), scope); auto it = std::find(this->kids_.begin(), this->kids_.end(), scope);
return it != this->kids_.end(); return it != this->kids_.end();
} }
std::vector<std::string> Scope::LocalVarNames() const { std::vector<std::string> Scope::LocalVarNames() const {
SCOPE_LOCK_GUARD
std::vector<std::string> known_vars; std::vector<std::string> known_vars;
{
SCOPE_VARS_READER_LOCK
known_vars.reserve(this->vars_.size()); known_vars.reserve(this->vars_.size());
for (auto& p : vars_) { for (auto& p : vars_) {
known_vars.emplace_back(p.first); known_vars.emplace_back(p.first);
} }
}
return known_vars; return known_vars;
} }
void Scope::DeleteScope(Scope* scope) const { void Scope::DeleteScope(Scope* scope) const {
SCOPE_KIDS_WRITER_LOCK SCOPE_LOCK_GUARD
auto it = std::find(this->kids_.begin(), this->kids_.end(), scope); auto it = std::find(this->kids_.begin(), this->kids_.end(), scope);
PADDLE_ENFORCE(it != this->kids_.end(), "%p Cannot find %p as kid scope", PADDLE_ENFORCE(it != this->kids_.end(), "%p Cannot find %p as kid scope",
this, scope); this, scope);
...@@ -149,8 +138,8 @@ void Scope::DeleteScope(Scope* scope) const { ...@@ -149,8 +138,8 @@ void Scope::DeleteScope(Scope* scope) const {
} }
void Scope::EraseVars(const std::vector<std::string>& var_names) { void Scope::EraseVars(const std::vector<std::string>& var_names) {
SCOPE_LOCK_GUARD
std::set<std::string> var_set(var_names.begin(), var_names.end()); std::set<std::string> var_set(var_names.begin(), var_names.end());
SCOPE_VARS_WRITER_LOCK
for (auto it = vars_.begin(); it != vars_.end();) { for (auto it = vars_.begin(); it != vars_.end();) {
if (var_set.find(it->first) != var_set.end()) { if (var_set.find(it->first) != var_set.end()) {
it = vars_.erase(it); it = vars_.erase(it);
...@@ -162,12 +151,12 @@ void Scope::EraseVars(const std::vector<std::string>& var_names) { ...@@ -162,12 +151,12 @@ void Scope::EraseVars(const std::vector<std::string>& var_names) {
void Scope::Rename(const std::string& origin_name, void Scope::Rename(const std::string& origin_name,
const std::string& new_name) const { const std::string& new_name) const {
SCOPE_VARS_WRITER_LOCK SCOPE_LOCK_GUARD
RenameInternal(origin_name, new_name); RenameInternal(origin_name, new_name);
} }
std::string Scope::Rename(const std::string& origin_name) const { std::string Scope::Rename(const std::string& origin_name) const {
SCOPE_VARS_WRITER_LOCK SCOPE_LOCK_GUARD
auto new_name = string::Sprintf("%p.%d", this, vars_.size()); auto new_name = string::Sprintf("%p.%d", this, vars_.size());
RenameInternal(origin_name, new_name); RenameInternal(origin_name, new_name);
return new_name; return new_name;
......
...@@ -14,19 +14,12 @@ limitations under the License. */ ...@@ -14,19 +14,12 @@ limitations under the License. */
#pragma once #pragma once
extern "C" {
#include <xxhash.h>
}
#include <functional>
#include <list> #include <list>
#include <memory> #include <mutex> // NOLINT
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
#include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/framework/rw_lock.h"
#include "paddle/fluid/framework/variable.h" #include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/platform/macros.h" #include "paddle/fluid/platform/macros.h"
...@@ -38,14 +31,6 @@ bool IsFastEagerDeletionModeEnabled(); ...@@ -38,14 +31,6 @@ bool IsFastEagerDeletionModeEnabled();
class Scope; class Scope;
namespace inner {
struct KeyHasher {
std::size_t operator()(const std::string& key) const {
return XXH32(key.c_str(), key.size(), 1);
}
};
} // namespace inner
/** /**
* @brief Scope that manage all variables. * @brief Scope that manage all variables.
* *
...@@ -110,14 +95,7 @@ class Scope { ...@@ -110,14 +95,7 @@ class Scope {
std::string Rename(const std::string& origin_name) const; std::string Rename(const std::string& origin_name) const;
protected: protected:
mutable std::unordered_map<std::string, std::unique_ptr<Variable>, mutable std::unordered_map<std::string, std::unique_ptr<Variable>> vars_;
inner::KeyHasher>
vars_;
// mutable tsl::robin_map<
// std::string, std::unique_ptr<Variable>, std::hash<std::string>,
// std::equal_to<std::string>,
// std::allocator<std::pair<std::string, std::unique_ptr<Variable>>>, true>
// vars_;
private: private:
// Call Scope::NewScope for a sub-scope. // Call Scope::NewScope for a sub-scope.
...@@ -146,8 +124,7 @@ class Scope { ...@@ -146,8 +124,7 @@ class Scope {
DISABLE_COPY_AND_ASSIGN(Scope); DISABLE_COPY_AND_ASSIGN(Scope);
private: private:
mutable RWLock kids_lock_; mutable std::mutex mutex_;
mutable RWLock vars_lock_;
}; };
// Generate some debug string about the inherience structure of scope, quite // Generate some debug string about the inherience structure of scope, quite
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#if !defined(_WIN32)
#include <pthread.h>
#else
#include <mutex> // NOLINT
#endif // !_WIN32
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace framework {
#if !defined(_WIN32)
struct SpinLock {
SpinLock() { pthread_spin_init(&lock_, PTHREAD_PROCESS_PRIVATE); }
~SpinLock() { pthread_spin_destroy(&lock_); }
void Lock() {
PADDLE_ENFORCE_EQ(pthread_spin_lock(&lock_), 0, "acquire spin lock failed");
}
void Unlock() {
PADDLE_ENFORCE_EQ(pthread_spin_unlock(&lock_), 0,
"release spin lock failed");
}
private:
pthread_spinlock_t lock_;
};
#else
// FIXME(minqiyang): use mutex here to do fake spin lock
struct SpinLock {
void Lock() { mutex_.lock(); }
void Unlock() { mutex_.lock(); }
private:
std::mutex mutex_;
};
#endif
class AutoSpinLock {
public:
explicit SpinLockGuard(SpinLock* spin_lock) : lock_(spin_lock) {
lock_->Lock();
}
~SpinLockGuard() { lock_->Unlock(); }
private:
SpinLock* lock_;
};
} // namespace framework
} // namespace paddle
...@@ -180,7 +180,7 @@ class ConcatFunctor<platform::CUDADeviceContext, T> { ...@@ -180,7 +180,7 @@ class ConcatFunctor<platform::CUDADeviceContext, T> {
} }
// Wait() must be called because `inputs_data` may be destructed before // Wait() must be called because `inputs_data` may be destructed before
// kernel ends // kernel ends
/* context.Wait(); */ context.Wait();
} }
}; };
...@@ -258,7 +258,7 @@ class SplitFunctor<platform::CUDADeviceContext, T> { ...@@ -258,7 +258,7 @@ class SplitFunctor<platform::CUDADeviceContext, T> {
} }
// Wait() must be called because `outputs_data` may be destructed before // Wait() must be called because `outputs_data` may be destructed before
// kernel ends // kernel ends
/* context.Wait(); */ context.Wait();
} }
}; };
......
...@@ -822,7 +822,7 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -822,7 +822,7 @@ All parameter, weight, gradient are variables in Paddle.
R"DOC(The type is INT, num_iteration_per_drop_scope indicates how R"DOC(The type is INT, num_iteration_per_drop_scope indicates how
many iterations to clean up the temp variables which many iterations to clean up the temp variables which
is generated during execution. It may make the execution faster, is generated during execution. It may make the execution faster,
because the temp variable's shape maybe the same between two iterations. Default 1. because the temp variable's shape maybe the same between two iterations. Default 100.
NOTES: NOTES:
1. If you fetch data when calling the 'run', the ParallelExecutor 1. If you fetch data when calling the 'run', the ParallelExecutor
......
...@@ -92,7 +92,6 @@ def cuda_profiler(output_file, output_mode=None, config=None): ...@@ -92,7 +92,6 @@ def cuda_profiler(output_file, output_mode=None, config=None):
config_file = 'nvprof_config_file' config_file = 'nvprof_config_file'
with open(config_file, 'wb') as fp: with open(config_file, 'wb') as fp:
fp.writelines([six.b("%s\n" % item) for item in config]) fp.writelines([six.b("%s\n" % item) for item in config])
#Comment this for nvprof
core.nvprof_init(output_file, output_mode, config_file) core.nvprof_init(output_file, output_mode, config_file)
# Enables profiler collection by the active CUDA profiling tool. # Enables profiler collection by the active CUDA profiling tool.
core.nvprof_start() core.nvprof_start()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册