Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
c0492f25
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
c0492f25
编写于
1月 09, 2019
作者:
Q
Qiyang Min
提交者:
GitHub
1月 09, 2019
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #15213 from velconia/accelerate_little_model_local_release_1_2_x
Accelerate little models
上级
19534daf
b5baca10
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
125 addition
and
117 deletion
+125
-117
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+1
-1
paddle/fluid/framework/details/execution_strategy.h
paddle/fluid/framework/details/execution_strategy.h
+1
-1
paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc
...id/framework/details/scope_buffered_ssa_graph_executor.cc
+12
-24
paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h
...uid/framework/details/scope_buffered_ssa_graph_executor.h
+28
-0
paddle/fluid/framework/rw_lock.h
paddle/fluid/framework/rw_lock.h
+35
-68
paddle/fluid/framework/scope.cc
paddle/fluid/framework/scope.cc
+31
-20
paddle/fluid/framework/scope.h
paddle/fluid/framework/scope.h
+17
-3
未找到文件。
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
c0492f25
...
...
@@ -80,7 +80,7 @@ cc_test(variable_test SRCS variable_test.cc)
cc_library
(
threadpool SRCS threadpool.cc DEPS enforce
)
cc_test
(
threadpool_test SRCS threadpool_test.cc DEPS threadpool
)
cc_library
(
scope SRCS scope.cc DEPS glog threadpool
)
cc_library
(
scope SRCS scope.cc DEPS glog threadpool
xxhash
)
cc_test
(
scope_test SRCS scope_test.cc DEPS scope
)
cc_library
(
data_device_transform SRCS data_device_transform.cc DEPS tensor
)
...
...
paddle/fluid/framework/details/execution_strategy.h
浏览文件 @
c0492f25
...
...
@@ -25,7 +25,7 @@ struct ExecutionStrategy {
size_t
num_threads_
{
0
};
bool
use_cuda_
{
true
};
bool
allow_op_delay_
{
false
};
size_t
num_iteration_per_drop_scope_
{
1
00
};
size_t
num_iteration_per_drop_scope_
{
1
};
ExecutorType
type_
{
kDefault
};
bool
dry_run_
{
false
};
};
...
...
paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc
浏览文件 @
c0492f25
...
...
@@ -18,9 +18,6 @@
#include <vector>
#include "paddle/fluid/framework/variable_helper.h"
#include "paddle/fluid/platform/profiler.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/framework/details/reference_count_op_handle.h"
#endif
namespace
paddle
{
namespace
framework
{
...
...
@@ -67,35 +64,26 @@ FeedFetchList ScopeBufferedSSAGraphExecutor::Run(
}
platform
::
RecordEvent
e
(
"ScopeBufferedSSAGraphExecutorAfterRun"
,
nullptr
);
drop_scope_counter_
+=
1
;
++
drop_scope_counter_
;
#ifdef PADDLE_WITH_CUDA
const
std
::
string
gc_name
=
"garbage_collector"
;
DeviceGarbageCollectorMap
*
gc
=
Graph
().
Has
(
gc_name
)
?
&
(
Graph
().
Get
<
DeviceGarbageCollectorMap
>
(
gc_name
))
:
nullptr
;
#endif
bool
stream_end
=
false
;
if
(
!
fetch_tensors
.
empty
())
{
WaitComputationalStreams
();
stream_end
=
true
;
}
if
(
!
fetch_tensors
.
empty
()
||
drop_scope_counter_
==
strategy_
.
num_iteration_per_drop_scope_
)
{
drop_scope_counter_
=
0
;
// Wait All computational streams
for
(
auto
p
:
places_
)
{
platform
::
DeviceContextPool
::
Instance
().
Get
(
p
)
->
Wait
();
#ifdef PADDLE_WITH_CUDA
if
(
gc
!=
nullptr
&&
platform
::
is_gpu_place
(
p
))
{
auto
gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
p
);
auto
&
gc_at_place
=
gc
->
at
(
gpu_place
.
device
);
gc_at_place
->
Wait
();
gc_at_place
->
Reset
();
}
#endif
if
(
drop_scope_counter_
==
strategy_
.
num_iteration_per_drop_scope_
)
{
if
(
!
stream_end
)
{
WaitComputationalStreams
();
}
for
(
auto
&
scope
:
local_scopes_
)
{
auto
&
local_scope
=
*
scope
->
Var
(
details
::
kLocalExecScopeName
)
->
GetMutable
<
Scope
*>
();
scope
->
DeleteScope
(
local_scope
);
}
drop_scope_counter_
=
0
;
}
if
(
eptr
)
{
std
::
rethrow_exception
(
eptr
);
...
...
paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h
浏览文件 @
c0492f25
...
...
@@ -24,6 +24,10 @@
#include "paddle/fluid/framework/details/ssa_graph_executor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/platform/place.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/framework/details/reference_count_op_handle.h"
#endif
namespace
paddle
{
namespace
framework
{
namespace
details
{
...
...
@@ -47,6 +51,30 @@ class ScopeBufferedSSAGraphExecutor : public SSAGraphExecutor {
FeedFetchList
Run
(
const
std
::
vector
<
std
::
string
>&
fetch_tensors
)
override
;
private:
inline
void
WaitComputationalStreams
()
{
#ifdef PADDLE_WITH_CUDA
const
std
::
string
gc_name
=
"garbage_collector"
;
DeviceGarbageCollectorMap
*
gc
=
Graph
().
Has
(
gc_name
)
?
&
(
Graph
().
Get
<
DeviceGarbageCollectorMap
>
(
gc_name
))
:
nullptr
;
#endif
// Wait All computational streams
for
(
auto
p
:
places_
)
{
platform
::
DeviceContextPool
::
Instance
().
Get
(
p
)
->
Wait
();
#ifdef PADDLE_WITH_CUDA
if
(
gc
!=
nullptr
&&
platform
::
is_gpu_place
(
p
))
{
auto
gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
p
);
auto
&
gc_at_place
=
gc
->
at
(
gpu_place
.
device
);
gc_at_place
->
Wait
();
gc_at_place
->
Reset
();
}
#endif
}
}
private:
size_t
drop_scope_counter_
{
0
};
...
...
paddle/fluid/framework/rw_lock.h
浏览文件 @
c0492f25
...
...
@@ -16,7 +16,9 @@ limitations under the License. */
#if !defined(_WIN32)
#include <pthread.h>
#endif // !_WIN32
#else
#include <mutex> // NOLINT
#endif // !_WIN32
#include "paddle/fluid/platform/enforce.h"
...
...
@@ -29,17 +31,17 @@ struct RWLock {
~
RWLock
()
{
pthread_rwlock_destroy
(
&
lock_
);
}
void
RDLock
()
{
inline
void
RDLock
()
{
PADDLE_ENFORCE_EQ
(
pthread_rwlock_rdlock
(
&
lock_
),
0
,
"acquire read lock failed"
);
}
void
WRLock
()
{
inline
void
WRLock
()
{
PADDLE_ENFORCE_EQ
(
pthread_rwlock_wrlock
(
&
lock_
),
0
,
"acquire write lock failed"
);
}
void
UNLock
()
{
inline
void
UNLock
()
{
PADDLE_ENFORCE_EQ
(
pthread_rwlock_unlock
(
&
lock_
),
0
,
"unlock failed"
);
}
...
...
@@ -51,81 +53,46 @@ struct RWLock {
// https://stackoverflow.com/questions/7125250/making-pthread-rwlock-wrlock-recursive
// In windows, rw_lock seems like a hack. Use empty object and do nothing.
struct
RWLock
{
void
RDLock
()
{}
void
WRLock
()
{}
void
UNLock
()
{}
// FIXME(minqiyang): use mutex here to do fake lock
inline
void
RDLock
()
{
mutex_
.
lock
();
}
inline
void
WRLock
()
{
mutex_
.
lock
();
}
inline
void
UNLock
()
{
mutex_
.
unlock
();
}
private:
std
::
mutex
mutex_
;
};
#endif
class
RWLockGuard
{
class
AutoWRLock
{
public:
enum
Status
{
kUnLock
,
kWRLock
,
kRDLock
};
RWLockGuard
(
RWLock
*
rw_lock
,
Status
init_status
)
:
lock_
(
rw_lock
),
status_
(
Status
::
kUnLock
)
{
switch
(
init_status
)
{
case
Status
::
kRDLock
:
{
RDLock
();
break
;
}
case
Status
::
kWRLock
:
{
WRLock
();
break
;
}
case
Status
::
kUnLock
:
{
break
;
}
}
}
explicit
AutoWRLock
(
RWLock
*
rw_lock
)
:
lock_
(
rw_lock
)
{
Lock
();
}
void
WRLock
()
{
switch
(
status_
)
{
case
Status
::
kUnLock
:
{
lock_
->
WRLock
();
status_
=
Status
::
kWRLock
;
break
;
}
case
Status
::
kWRLock
:
{
break
;
}
case
Status
::
kRDLock
:
{
PADDLE_THROW
(
"Please unlock read lock first before invoking write lock."
);
break
;
}
}
}
~
AutoWRLock
()
{
UnLock
();
}
void
RDLock
()
{
switch
(
status_
)
{
case
Status
::
kUnLock
:
{
lock_
->
RDLock
();
status_
=
Status
::
kRDLock
;
break
;
}
case
Status
::
kRDLock
:
{
break
;
}
case
Status
::
kWRLock
:
{
PADDLE_THROW
(
"Please unlock write lock first before invoking read lock."
);
break
;
}
}
}
private:
inline
void
Lock
()
{
lock_
->
WRLock
();
}
void
UnLock
()
{
if
(
status_
!=
Status
::
kUnLock
)
{
lock_
->
UNLock
();
status_
=
Status
::
kUnLock
;
}
}
inline
void
UnLock
()
{
lock_
->
UNLock
();
}
private:
RWLock
*
lock_
;
};
class
AutoRDLock
{
public:
explicit
AutoRDLock
(
RWLock
*
rw_lock
)
:
lock_
(
rw_lock
)
{
Lock
();
}
~
AutoRDLock
()
{
UnLock
();
}
private:
inline
void
Lock
()
{
lock_
->
RDLock
();
}
~
RWLockGuard
()
{
Un
Lock
();
}
inline
void
UnLock
()
{
lock_
->
UN
Lock
();
}
private:
RWLock
*
lock_
;
Status
status_
;
};
}
// namespace framework
...
...
paddle/fluid/framework/scope.cc
浏览文件 @
c0492f25
...
...
@@ -43,9 +43,15 @@ DEFINE_double(
// the mutex will cause serious performance issue.
// So the mutex is disabled when `ON_INFER`.
#ifdef PADDLE_ON_INFERENCE
#define SCOPE_LOCK_GUARD
#define SCOPE_KIDS_READER_LOCK
#define SCOPE_KIDS_WRITER_LOCK
#define SCOPE_VARS_READER_LOCK
#define SCOPE_VARS_WRITER_LOCK
#else
#define SCOPE_LOCK_GUARD std::lock_guard<std::mutex> lock(mutex_);
#define SCOPE_KIDS_READER_LOCK AutoRDLock auto_lock(&kids_lock_);
#define SCOPE_KIDS_WRITER_LOCK AutoWRLock auto_lock(&kids_lock_);
#define SCOPE_VARS_READER_LOCK AutoRDLock auto_lock(&vars_lock_);
#define SCOPE_VARS_WRITER_LOCK AutoWRLock auto_lock(&vars_lock_);
#endif
namespace
paddle
{
...
...
@@ -61,64 +67,69 @@ int64_t GetEagerDeletionThreshold() {
Scope
::~
Scope
()
{
DropKids
();
}
Scope
&
Scope
::
NewScope
()
const
{
SCOPE_LOCK_GUARD
kids_
.
push_back
(
new
Scope
(
this
));
return
*
kids_
.
back
();
Scope
*
child
=
new
Scope
(
this
);
{
SCOPE_KIDS_WRITER_LOCK
kids_
.
push_back
(
child
);
}
return
*
child
;
}
Variable
*
Scope
::
Var
(
const
std
::
string
&
name
)
{
SCOPE_
LOCK_GUARD
SCOPE_
VARS_WRITER_LOCK
return
VarInternal
(
name
);
}
Variable
*
Scope
::
Var
(
std
::
string
*
name
)
{
SCOPE_LOCK_GUARD
auto
new_name
=
string
::
Sprintf
(
"%p.%d"
,
this
,
vars_
.
size
());
if
(
name
!=
nullptr
)
{
*
name
=
new_name
;
}
SCOPE_VARS_WRITER_LOCK
return
VarInternal
(
new_name
);
}
Variable
*
Scope
::
FindVar
(
const
std
::
string
&
name
)
const
{
SCOPE_
LOCK_GUARD
SCOPE_
VARS_READER_LOCK
return
FindVarInternal
(
name
);
}
Variable
*
Scope
::
FindLocalVar
(
const
std
::
string
&
name
)
const
{
SCOPE_
LOCK_GUARD
SCOPE_
VARS_READER_LOCK
return
FindVarLocally
(
name
);
}
const
Scope
*
Scope
::
FindScope
(
const
Variable
*
var
)
const
{
SCOPE_
LOCK_GUARD
SCOPE_
VARS_READER_LOCK
return
FindScopeInternal
(
var
);
}
void
Scope
::
DropKids
()
{
SCOPE_
LOCK_GUARD
SCOPE_
KIDS_WRITER_LOCK
for
(
Scope
*
s
:
kids_
)
delete
s
;
kids_
.
clear
();
}
bool
Scope
::
HasKid
(
const
Scope
*
scope
)
const
{
SCOPE_
LOCK_GUARD
SCOPE_
KIDS_READER_LOCK
auto
it
=
std
::
find
(
this
->
kids_
.
begin
(),
this
->
kids_
.
end
(),
scope
);
return
it
!=
this
->
kids_
.
end
();
}
std
::
vector
<
std
::
string
>
Scope
::
LocalVarNames
()
const
{
SCOPE_LOCK_GUARD
std
::
vector
<
std
::
string
>
known_vars
;
known_vars
.
reserve
(
this
->
vars_
.
size
());
for
(
auto
&
p
:
vars_
)
{
known_vars
.
emplace_back
(
p
.
first
);
{
SCOPE_VARS_READER_LOCK
known_vars
.
reserve
(
this
->
vars_
.
size
());
for
(
auto
&
p
:
vars_
)
{
known_vars
.
emplace_back
(
p
.
first
);
}
}
return
known_vars
;
}
void
Scope
::
DeleteScope
(
Scope
*
scope
)
const
{
SCOPE_
LOCK_GUARD
SCOPE_
KIDS_WRITER_LOCK
auto
it
=
std
::
find
(
this
->
kids_
.
begin
(),
this
->
kids_
.
end
(),
scope
);
PADDLE_ENFORCE
(
it
!=
this
->
kids_
.
end
(),
"%p Cannot find %p as kid scope"
,
this
,
scope
);
...
...
@@ -132,8 +143,8 @@ void Scope::DeleteScope(Scope* scope) const {
}
void
Scope
::
EraseVars
(
const
std
::
vector
<
std
::
string
>&
var_names
)
{
SCOPE_LOCK_GUARD
std
::
set
<
std
::
string
>
var_set
(
var_names
.
begin
(),
var_names
.
end
());
SCOPE_VARS_WRITER_LOCK
for
(
auto
it
=
vars_
.
begin
();
it
!=
vars_
.
end
();)
{
if
(
var_set
.
find
(
it
->
first
)
!=
var_set
.
end
())
{
it
=
vars_
.
erase
(
it
);
...
...
@@ -145,12 +156,12 @@ void Scope::EraseVars(const std::vector<std::string>& var_names) {
void
Scope
::
Rename
(
const
std
::
string
&
origin_name
,
const
std
::
string
&
new_name
)
const
{
SCOPE_
LOCK_GUARD
SCOPE_
VARS_WRITER_LOCK
RenameInternal
(
origin_name
,
new_name
);
}
std
::
string
Scope
::
Rename
(
const
std
::
string
&
origin_name
)
const
{
SCOPE_
LOCK_GUARD
SCOPE_
VARS_WRITER_LOCK
auto
new_name
=
string
::
Sprintf
(
"%p.%d"
,
this
,
vars_
.
size
());
RenameInternal
(
origin_name
,
new_name
);
return
new_name
;
...
...
paddle/fluid/framework/scope.h
浏览文件 @
c0492f25
...
...
@@ -14,12 +14,18 @@ limitations under the License. */
#pragma once
extern
"C"
{
#include <xxhash.h>
}
#include <list>
#include <m
utex> // NOLINT
#include <m
emory>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/rw_lock.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/platform/macros.h"
...
...
@@ -94,7 +100,14 @@ class Scope {
std
::
string
Rename
(
const
std
::
string
&
origin_name
)
const
;
protected:
mutable
std
::
unordered_map
<
std
::
string
,
std
::
unique_ptr
<
Variable
>>
vars_
;
struct
KeyHasher
{
std
::
size_t
operator
()(
const
std
::
string
&
key
)
const
{
return
XXH32
(
key
.
c_str
(),
key
.
size
(),
1
);
}
};
mutable
std
::
unordered_map
<
std
::
string
,
std
::
unique_ptr
<
Variable
>
,
KeyHasher
>
vars_
;
private:
// Call Scope::NewScope for a sub-scope.
...
...
@@ -123,7 +136,8 @@ class Scope {
DISABLE_COPY_AND_ASSIGN
(
Scope
);
private:
mutable
std
::
mutex
mutex_
;
mutable
RWLock
kids_lock_
;
mutable
RWLock
vars_lock_
;
};
// Generate some debug string about the inherience structure of scope, quite
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录