Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
b6c8701e
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b6c8701e
编写于
6月 06, 2018
作者:
Y
yuyang18
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into feature/extract_tensor
上级
fc9f2d28
106ee9d1
变更
24
隐藏空白更改
内联
并排
Showing
24 changed file
with
183 addition
and
82 deletion
+183
-82
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+1
-1
paddle/fluid/framework/details/CMakeLists.txt
paddle/fluid/framework/details/CMakeLists.txt
+1
-0
paddle/fluid/framework/details/execution_strategy.h
paddle/fluid/framework/details/execution_strategy.h
+1
-0
paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc
...id/framework/details/scope_buffered_ssa_graph_executor.cc
+76
-0
paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h
...uid/framework/details/scope_buffered_ssa_graph_executor.h
+53
-0
paddle/fluid/framework/details/ssa_graph_executor.cc
paddle/fluid/framework/details/ssa_graph_executor.cc
+0
-4
paddle/fluid/framework/details/ssa_graph_executor.h
paddle/fluid/framework/details/ssa_graph_executor.h
+1
-5
paddle/fluid/framework/details/threaded_ssa_graph_executor.cc
...le/fluid/framework/details/threaded_ssa_graph_executor.cc
+4
-2
paddle/fluid/framework/details/threaded_ssa_graph_executor.h
paddle/fluid/framework/details/threaded_ssa_graph_executor.h
+1
-0
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+16
-42
paddle/fluid/framework/scope.cc
paddle/fluid/framework/scope.cc
+6
-12
paddle/fluid/framework/scope.h
paddle/fluid/framework/scope.h
+6
-1
paddle/fluid/platform/device_context.cc
paddle/fluid/platform/device_context.cc
+0
-1
paddle/fluid/platform/device_context.h
paddle/fluid/platform/device_context.h
+0
-3
paddle/fluid/platform/dynload/cublas.h
paddle/fluid/platform/dynload/cublas.h
+1
-1
paddle/fluid/platform/dynload/cudnn.h
paddle/fluid/platform/dynload/cudnn.h
+1
-1
paddle/fluid/platform/dynload/cupti.h
paddle/fluid/platform/dynload/cupti.h
+1
-1
paddle/fluid/platform/dynload/curand.h
paddle/fluid/platform/dynload/curand.h
+1
-1
paddle/fluid/platform/dynload/nccl.h
paddle/fluid/platform/dynload/nccl.h
+1
-1
paddle/fluid/platform/dynload/tensorrt.h
paddle/fluid/platform/dynload/tensorrt.h
+1
-1
paddle/fluid/platform/dynload/warpctc.h
paddle/fluid/platform/dynload/warpctc.h
+1
-1
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+8
-0
python/paddle/fluid/tests/no_test_concurrency.py
python/paddle/fluid/tests/no_test_concurrency.py
+0
-0
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+2
-4
未找到文件。
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
b6c8701e
...
...
@@ -87,7 +87,7 @@ cc_library(executor SRCS executor.cc DEPS op_registry device_context scope
framework_proto glog lod_rank_table feed_fetch_method
)
cc_library
(
parallel_executor SRCS parallel_executor.cc DEPS multi_devices_graph_builder threaded_ssa_graph_executor
)
cc_library
(
parallel_executor SRCS parallel_executor.cc DEPS multi_devices_graph_builder threaded_ssa_graph_executor
scope_buffered_ssa_graph_executor
)
cc_library
(
prune SRCS prune.cc DEPS framework_proto
)
cc_test
(
prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context
)
...
...
paddle/fluid/framework/details/CMakeLists.txt
浏览文件 @
b6c8701e
...
...
@@ -36,5 +36,6 @@ cc_test(broadcast_op_test SRCS broadcast_op_handle_test.cc DEPS var_handle op_ha
device_context broadcast_op_handle
)
cc_test
(
gather_op_test SRCS gather_op_handle_test.cc DEPS var_handle op_handle_base scope ddim memory
device_context gather_op_handle
)
cc_library
(
scope_buffered_ssa_graph_executor SRCS scope_buffered_ssa_graph_executor.cc DEPS ssa_graph_executor
)
#cc_test(reduce_op_handle_test SRCS reduce_op_handle_test.cc DEPS var_handle op_handle_base scope ddim memory
# device_context reduce_op_handle )
paddle/fluid/framework/details/execution_strategy.h
浏览文件 @
b6c8701e
...
...
@@ -22,6 +22,7 @@ struct ExecutionStrategy {
size_t
num_threads_
{
0
};
bool
use_event_
{
true
};
bool
allow_op_delay_
{
false
};
size_t
num_iteration_per_drop_scope_
{
100
};
};
}
// namespace details
...
...
paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc
0 → 100644
浏览文件 @
b6c8701e
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h"
#include <string>
#include <vector>
#include "paddle/fluid/framework/executor.h"
namespace
paddle
{
namespace
framework
{
namespace
details
{
ScopeBufferedSSAGraphExecutor
::
ScopeBufferedSSAGraphExecutor
(
ExecutionStrategy
strategy
,
std
::
vector
<
Scope
*>
local_scopes
,
std
::
vector
<
VariableInfo
>
var_infos
,
std
::
vector
<
platform
::
Place
>
places
,
std
::
unique_ptr
<
SSAGraphExecutor
>
&&
underlying_executor
)
:
strategy_
(
std
::
move
(
strategy
)),
underlying_executor_
(
std
::
move
(
underlying_executor
)),
local_scopes_
(
std
::
move
(
local_scopes
)),
var_infos_
(
std
::
move
(
var_infos
)),
places_
(
std
::
move
(
places
))
{}
FeedFetchList
ScopeBufferedSSAGraphExecutor
::
Run
(
const
std
::
vector
<
std
::
string
>
&
fetch_tensors
)
{
if
(
drop_scope_counter_
==
0
)
{
// Create local scopes.
for
(
auto
it
=
local_scopes_
.
rbegin
();
it
!=
local_scopes_
.
rend
();
++
it
)
{
auto
&
scope
=
*
it
;
Scope
&
local_scope
=
scope
->
NewScope
();
*
scope
->
Var
(
details
::
kLocalExecScopeName
)
->
GetMutable
<
Scope
*>
()
=
&
local_scope
;
for
(
auto
&
info
:
var_infos_
)
{
if
(
scope
->
FindVar
(
info
.
name_
)
!=
nullptr
)
{
continue
;
}
if
(
info
.
persistable_
)
{
// Persistable
InitializeVariable
(
scope
->
Var
(
info
.
name_
),
info
.
type_
);
}
else
{
InitializeVariable
(
local_scope
.
Var
(
info
.
name_
),
info
.
type_
);
}
}
}
}
auto
fetch_data
=
underlying_executor_
->
Run
(
fetch_tensors
);
drop_scope_counter_
+=
1
;
if
(
!
fetch_tensors
.
empty
()
||
drop_scope_counter_
==
strategy_
.
num_iteration_per_drop_scope_
)
{
drop_scope_counter_
=
0
;
// Wait All computational streams
for
(
auto
p
:
places_
)
{
platform
::
DeviceContextPool
::
Instance
().
Get
(
p
)
->
Wait
();
}
for
(
auto
&
scope
:
local_scopes_
)
{
auto
&
local_scope
=
*
scope
->
Var
(
details
::
kLocalExecScopeName
)
->
GetMutable
<
Scope
*>
();
scope
->
DeleteScope
(
local_scope
);
}
}
return
fetch_data
;
}
}
// namespace details
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h
0 → 100644
浏览文件 @
b6c8701e
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/details/execution_strategy.h"
#include "paddle/fluid/framework/details/ssa_graph_executor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/platform/place.h"
namespace
paddle
{
namespace
framework
{
namespace
details
{
struct
VariableInfo
{
std
::
string
name_
;
proto
::
VarType
::
Type
type_
;
bool
persistable_
;
};
class
ScopeBufferedSSAGraphExecutor
:
public
SSAGraphExecutor
{
public:
ScopeBufferedSSAGraphExecutor
(
ExecutionStrategy
strategy
,
std
::
vector
<
Scope
*>
local_scopes
,
std
::
vector
<
VariableInfo
>
var_infos
,
std
::
vector
<
platform
::
Place
>
places
,
std
::
unique_ptr
<
SSAGraphExecutor
>&&
underlying_executor
);
FeedFetchList
Run
(
const
std
::
vector
<
std
::
string
>&
fetch_tensors
)
override
;
private:
size_t
drop_scope_counter_
{
0
};
ExecutionStrategy
strategy_
;
std
::
unique_ptr
<
SSAGraphExecutor
>
underlying_executor_
;
std
::
vector
<
Scope
*>
local_scopes_
;
std
::
vector
<
VariableInfo
>
var_infos_
;
std
::
vector
<
platform
::
Place
>
places_
;
};
}
// namespace details
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/details/ssa_graph_executor.cc
浏览文件 @
b6c8701e
...
...
@@ -17,10 +17,6 @@
namespace
paddle
{
namespace
framework
{
namespace
details
{
SSAGraphExecutor
::
SSAGraphExecutor
(
std
::
unique_ptr
<
SSAGraph
>
&&
graph
)
:
graph_
(
std
::
move
(
graph
))
{}
SSAGraphExecutor
::~
SSAGraphExecutor
()
{}
}
// namespace details
...
...
paddle/fluid/framework/details/ssa_graph_executor.h
浏览文件 @
b6c8701e
...
...
@@ -28,15 +28,11 @@ class SSAGraphExecutor {
DISABLE_COPY_AND_ASSIGN
(
SSAGraphExecutor
);
public:
// Steal graph inside
explicit
SSAGraphExecutor
(
std
::
unique_ptr
<
SSAGraph
>
&&
graph
);
SSAGraphExecutor
()
{}
virtual
~
SSAGraphExecutor
();
virtual
FeedFetchList
Run
(
const
std
::
vector
<
std
::
string
>
&
fetch_tensors
)
=
0
;
protected:
std
::
unique_ptr
<
SSAGraph
>
graph_
;
};
}
// namespace details
}
// namespace framework
...
...
paddle/fluid/framework/details/threaded_ssa_graph_executor.cc
浏览文件 @
b6c8701e
...
...
@@ -21,7 +21,7 @@ ThreadedSSAGraphExecutor::ThreadedSSAGraphExecutor(
const
ExecutionStrategy
&
strategy
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
std
::
unique_ptr
<
SSAGraph
>
&&
graph
)
:
SSAGraphExecutor
(
std
::
move
(
graph
)),
:
graph_
(
std
::
move
(
graph
)),
pool_
(
strategy
.
num_threads_
>=
2
?
new
::
ThreadPool
(
strategy
.
num_threads_
)
:
nullptr
),
local_scopes_
(
local_scopes
),
...
...
@@ -189,7 +189,9 @@ void ThreadedSSAGraphExecutor::RunOp(
BlockingQueue
<
VarHandleBase
*>
*
ready_var_q
,
details
::
OpHandleBase
*
op
)
{
auto
op_run
=
[
ready_var_q
,
op
,
this
]
{
try
{
VLOG
(
10
)
<<
op
<<
" "
<<
op
->
Name
()
<<
" : "
<<
op
->
DebugString
();
if
(
VLOG_IS_ON
(
10
))
{
VLOG
(
10
)
<<
op
<<
" "
<<
op
->
Name
()
<<
" : "
<<
op
->
DebugString
();
}
op
->
Run
(
strategy_
.
use_event_
);
VLOG
(
10
)
<<
op
<<
" "
<<
op
->
Name
()
<<
" Done "
;
running_ops_
--
;
...
...
paddle/fluid/framework/details/threaded_ssa_graph_executor.h
浏览文件 @
b6c8701e
...
...
@@ -51,6 +51,7 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor {
details
::
OpHandleBase
*
op
);
private:
std
::
unique_ptr
<
SSAGraph
>
graph_
;
std
::
unique_ptr
<::
ThreadPool
>
pool_
;
std
::
vector
<
Scope
*>
local_scopes_
;
std
::
vector
<
platform
::
Place
>
places_
;
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
b6c8701e
...
...
@@ -23,6 +23,7 @@ limitations under the License. */
#endif
#include "paddle/fluid/framework/details/multi_devices_graph_builder.h"
#include "paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h"
#include "paddle/fluid/framework/details/threaded_ssa_graph_executor.h"
#include "paddle/fluid/platform/profiler.h"
...
...
@@ -42,8 +43,6 @@ class ParallelExecutorPrivate {
#ifdef PADDLE_WITH_CUDA
std
::
unique_ptr
<
platform
::
NCCLContextMap
>
nccl_ctxs_
;
#endif
std
::
vector
<
std
::
tuple
<
std
::
string
,
proto
::
VarType
::
Type
,
bool
>>
var_types_
;
bool
own_local_scope
;
};
...
...
@@ -92,9 +91,18 @@ ParallelExecutor::ParallelExecutor(
local_scopes
.
empty
())
{
// Is CUDA
BCastParamsToGPUs
(
bcast_vars
);
}
// Startup Program has been run. All local scopes has correct parameters.
// Startup Program has been run. All local scopes has correct parameters.
// Step 2. Create vars in each scope;
std
::
vector
<
details
::
VariableInfo
>
var_infos
;
for
(
auto
*
var
:
main_program
.
Block
(
0
).
AllVars
())
{
var_infos
.
emplace_back
();
var_infos
.
back
().
name_
=
var
->
Name
();
var_infos
.
back
().
type_
=
var
->
GetType
();
var_infos
.
back
().
persistable_
=
var
->
Persistable
();
}
// Step
2
. Convert main_program to SSA form and dependency graph. Also, insert
// Step
3
. Convert main_program to SSA form and dependency graph. Also, insert
// ncclOp
#ifdef PADDLE_WITH_CUDA
details
::
MultiDevSSAGraphBuilder
builder
(
...
...
@@ -105,16 +113,15 @@ ParallelExecutor::ParallelExecutor(
params
,
member_
->
local_scopes_
,
build_strategy
);
#endif
auto
graph
=
builder
.
Build
(
main_program
);
member_
->
executor_
.
reset
(
new
details
::
ThreadedSSAGraphExecutor
(
exec_strategy
,
member_
->
local_scopes_
,
places
,
std
::
move
(
graph
)));
// Step 3. Create vars in each scope;
for
(
auto
*
var
:
main_program
.
Block
(
0
).
AllVars
())
{
member_
->
var_types_
.
emplace_back
(
var
->
Name
(),
var
->
GetType
(),
var
->
Persistable
());
}
member_
->
executor_
.
reset
(
new
details
::
ScopeBufferedSSAGraphExecutor
(
exec_strategy
,
member_
->
local_scopes_
,
std
::
move
(
var_infos
),
member_
->
places_
,
std
::
move
(
member_
->
executor_
)));
}
void
ParallelExecutor
::
BCastParamsToGPUs
(
...
...
@@ -169,42 +176,9 @@ void ParallelExecutor::BCastParamsToGPUs(
void
ParallelExecutor
::
Run
(
const
std
::
vector
<
std
::
string
>
&
fetch_tensors
,
const
std
::
string
&
fetched_var_name
)
{
platform
::
RecordBlock
b
(
0
);
// Create local scopes.
for
(
auto
it
=
member_
->
local_scopes_
.
rbegin
();
it
!=
member_
->
local_scopes_
.
rend
();
++
it
)
{
auto
&
scope
=
*
it
;
Scope
&
local_scope
=
scope
->
NewScope
();
*
scope
->
Var
(
details
::
kLocalExecScopeName
)
->
GetMutable
<
Scope
*>
()
=
&
local_scope
;
for
(
auto
&
name_type_pair
:
member_
->
var_types_
)
{
if
(
scope
->
FindVar
(
std
::
get
<
0
>
(
name_type_pair
))
!=
nullptr
)
{
continue
;
}
if
(
std
::
get
<
2
>
(
name_type_pair
))
{
// Persistable
InitializeVariable
(
scope
->
Var
(
std
::
get
<
0
>
(
name_type_pair
)),
std
::
get
<
1
>
(
name_type_pair
));
}
else
{
InitializeVariable
(
local_scope
.
Var
(
std
::
get
<
0
>
(
name_type_pair
)),
std
::
get
<
1
>
(
name_type_pair
));
}
}
}
auto
fetch_data
=
member_
->
executor_
->
Run
(
fetch_tensors
);
*
member_
->
global_scope_
->
Var
(
fetched_var_name
)
->
GetMutable
<
FeedFetchList
>
()
=
fetch_data
;
// Wait All computational streams
for
(
auto
p
:
member_
->
places_
)
{
platform
::
DeviceContextPool
::
Instance
().
Get
(
p
)
->
Wait
();
}
for
(
auto
&
scope
:
member_
->
local_scopes_
)
{
auto
&
local_scope
=
*
scope
->
Var
(
details
::
kLocalExecScopeName
)
->
GetMutable
<
Scope
*>
();
scope
->
DeleteScope
(
local_scope
);
}
}
void
ParallelExecutor
::
FeedTensorsIntoLocalScopes
(
...
...
paddle/fluid/framework/scope.cc
浏览文件 @
b6c8701e
...
...
@@ -34,13 +34,7 @@ DEFINE_bool(
namespace
paddle
{
namespace
framework
{
Scope
::~
Scope
()
{
DropKids
();
for
(
auto
&
kv
:
vars_
)
{
VLOG
(
3
)
<<
"Destroy variable "
<<
kv
.
first
;
delete
kv
.
second
;
}
}
Scope
::~
Scope
()
{
DropKids
();
}
Scope
&
Scope
::
NewScope
()
const
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
...
...
@@ -51,8 +45,9 @@ Scope& Scope::NewScope() const {
Variable
*
Scope
::
Var
(
const
std
::
string
&
name
)
{
auto
*
v
=
FindVarLocally
(
name
);
if
(
v
!=
nullptr
)
return
v
;
v
=
new
Variable
();
vars_
[
name
]
=
v
;
vars_
[
name
]
.
reset
(
v
)
;
VLOG
(
3
)
<<
"Create variable "
<<
name
;
v
->
name_
=
&
(
vars_
.
find
(
name
)
->
first
);
return
v
;
...
...
@@ -76,7 +71,7 @@ Variable* Scope::FindVar(const std::string& name) const {
const
Scope
*
Scope
::
FindScope
(
const
Variable
*
var
)
const
{
for
(
auto
&
kv
:
vars_
)
{
if
(
kv
.
second
==
var
)
{
if
(
kv
.
second
.
get
()
==
var
)
{
return
this
;
}
}
...
...
@@ -113,7 +108,6 @@ void Scope::EraseVars(const std::vector<std::string>& var_names) {
std
::
set
<
std
::
string
>
var_set
(
var_names
.
begin
(),
var_names
.
end
());
for
(
auto
it
=
vars_
.
begin
();
it
!=
vars_
.
end
();)
{
if
(
var_set
.
find
(
it
->
first
)
!=
var_set
.
end
())
{
delete
it
->
second
;
it
=
vars_
.
erase
(
it
);
}
else
{
++
it
;
...
...
@@ -129,7 +123,7 @@ void Scope::Rename(const std::string& origin_name,
auto
new_it
=
vars_
.
find
(
new_name
);
PADDLE_ENFORCE
(
new_it
==
vars_
.
end
(),
"The variable with name %s is already in the scope"
,
new_name
);
vars_
[
new_name
]
=
origin_it
->
second
;
vars_
[
new_name
]
.
reset
(
origin_it
->
second
.
release
())
;
vars_
.
erase
(
origin_it
);
}
...
...
@@ -141,7 +135,7 @@ std::string Scope::Rename(const std::string& origin_name) const {
Variable
*
Scope
::
FindVarLocally
(
const
std
::
string
&
name
)
const
{
auto
it
=
vars_
.
find
(
name
);
if
(
it
!=
vars_
.
end
())
return
it
->
second
;
if
(
it
!=
vars_
.
end
())
return
it
->
second
.
get
()
;
return
nullptr
;
}
...
...
paddle/fluid/framework/scope.h
浏览文件 @
b6c8701e
...
...
@@ -47,15 +47,18 @@ class Scope {
Scope
&
NewScope
()
const
;
/// Create a variable with given name if it doesn't exist.
/// Caller doesn't own the returned Variable.
Variable
*
Var
(
const
std
::
string
&
name
);
/// Create a variable with a scope-unique name.
/// Caller doesn't own the returned Variable.
Variable
*
Var
(
std
::
string
*
name
=
nullptr
);
void
EraseVars
(
const
std
::
vector
<
std
::
string
>&
var_names
);
/// Find a variable in the scope or any of its ancestors. Returns
/// nullptr if cannot find.
/// Caller doesn't own the returned Variable.
Variable
*
FindVar
(
const
std
::
string
&
name
)
const
;
const
Scope
*
parent
()
const
{
return
parent_
;
}
...
...
@@ -78,13 +81,15 @@ class Scope {
// Rename variable to a new name and return the new name
std
::
string
Rename
(
const
std
::
string
&
origin_name
)
const
;
/// Caller doesn't own the returned Variable.
Variable
*
FindVarLocally
(
const
std
::
string
&
name
)
const
;
private:
// Call Scope::NewScope for a sub-scope.
explicit
Scope
(
Scope
const
*
parent
)
:
parent_
(
parent
)
{}
mutable
std
::
unordered_map
<
std
::
string
,
Variable
*>
vars_
;
mutable
std
::
unordered_map
<
std
::
string
,
std
::
unique_ptr
<
Variable
>>
vars_
;
// Scope in `kids_` are owned by this class.
mutable
std
::
list
<
Scope
*>
kids_
;
Scope
const
*
parent_
{
nullptr
};
...
...
paddle/fluid/platform/device_context.cc
浏览文件 @
b6c8701e
...
...
@@ -175,7 +175,6 @@ CUDADeviceContext::~CUDADeviceContext() {
Place
CUDADeviceContext
::
GetPlace
()
const
{
return
place_
;
}
void
CUDADeviceContext
::
Wait
()
const
{
std
::
lock_guard
<
std
::
recursive_mutex
>
guard
(
mutex_
);
PADDLE_ENFORCE
(
cudaStreamSynchronize
(
stream_
));
PADDLE_ENFORCE
(
cudaGetLastError
());
}
...
...
paddle/fluid/platform/device_context.h
浏览文件 @
b6c8701e
...
...
@@ -100,7 +100,6 @@ class CUDADeviceContext : public DeviceContext {
template
<
typename
Callback
>
void
RecordEvent
(
cudaEvent_t
ev
,
Callback
callback
)
{
std
::
lock_guard
<
std
::
recursive_mutex
>
guard
(
mutex_
);
callback
();
PADDLE_ENFORCE
(
cudaEventRecord
(
ev
,
stream_
));
}
...
...
@@ -110,8 +109,6 @@ class CUDADeviceContext : public DeviceContext {
std
::
unique_ptr
<
Eigen
::
GpuDevice
>
eigen_device_
;
std
::
unique_ptr
<
EigenCudaStreamDevice
>
eigen_stream_
;
mutable
std
::
recursive_mutex
mutex_
;
cudaStream_t
stream_
;
cudnnHandle_t
cudnn_handle_
;
cublasHandle_t
cublas_handle_
;
...
...
paddle/fluid/platform/dynload/cublas.h
浏览文件 @
b6c8701e
...
...
@@ -45,7 +45,7 @@ extern void *cublas_dso_handle;
std::call_once(cublas_dso_flag, []() { \
cublas_dso_handle = paddle::platform::dynload::GetCublasDsoHandle(); \
}); \
void *p_##__name = dlsym(cublas_dso_handle, #__name);
\
static void *p_##__name = dlsym(cublas_dso_handle, #__name);
\
return reinterpret_cast<FUNC_TYPE>(p_##__name)(args...); \
} \
}; \
...
...
paddle/fluid/platform/dynload/cudnn.h
浏览文件 @
b6c8701e
...
...
@@ -39,7 +39,7 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
cudnn_dso_handle = paddle::platform::dynload::GetCUDNNDsoHandle(); \
}); \
EnforceCUDNNLoaded(#__name); \
void* p_##__name = dlsym(cudnn_dso_handle, #__name);
\
static void* p_##__name = dlsym(cudnn_dso_handle, #__name);
\
return reinterpret_cast<cudnn_func>(p_##__name)(args...); \
} \
}; \
...
...
paddle/fluid/platform/dynload/cupti.h
浏览文件 @
b6c8701e
...
...
@@ -45,7 +45,7 @@ extern void *cupti_dso_handle;
std::call_once(cupti_dso_flag, []() { \
cupti_dso_handle = paddle::platform::dynload::GetCUPTIDsoHandle(); \
}); \
void *p_##__name = dlsym(cupti_dso_handle, #__name);
\
static void *p_##__name = dlsym(cupti_dso_handle, #__name);
\
return reinterpret_cast<cuptiFunc>(p_##__name)(args...); \
} \
}; \
...
...
paddle/fluid/platform/dynload/curand.h
浏览文件 @
b6c8701e
...
...
@@ -34,7 +34,7 @@ extern void *curand_dso_handle;
std::call_once(curand_dso_flag, []() { \
curand_dso_handle = paddle::platform::dynload::GetCurandDsoHandle(); \
}); \
void *p_##__name = dlsym(curand_dso_handle, #__name);
\
static void *p_##__name = dlsym(curand_dso_handle, #__name);
\
return reinterpret_cast<curandFunc>(p_##__name)(args...); \
} \
}; \
...
...
paddle/fluid/platform/dynload/nccl.h
浏览文件 @
b6c8701e
...
...
@@ -37,7 +37,7 @@ extern void* nccl_dso_handle;
std::call_once(nccl_dso_flag, []() { \
nccl_dso_handle = paddle::platform::dynload::GetNCCLDsoHandle(); \
}); \
void* p_##__name = dlsym(nccl_dso_handle, #__name);
\
static void* p_##__name = dlsym(nccl_dso_handle, #__name);
\
return reinterpret_cast<nccl_func>(p_##__name)(args...); \
} \
}; \
...
...
paddle/fluid/platform/dynload/tensorrt.h
浏览文件 @
b6c8701e
...
...
@@ -40,7 +40,7 @@ extern void* tensorrt_dso_handle;
paddle::platform::dynload::GetTensorRtDsoHandle(); \
PADDLE_ENFORCE(tensorrt_dso_handle, "load tensorrt so failed"); \
}); \
void* p_##__name = dlsym(tensorrt_dso_handle, #__name);
\
static void* p_##__name = dlsym(tensorrt_dso_handle, #__name);
\
PADDLE_ENFORCE(p_##__name, "load %s failed", #__name); \
return reinterpret_cast<tensorrt_func>(p_##__name)(args...); \
} \
...
...
paddle/fluid/platform/dynload/warpctc.h
浏览文件 @
b6c8701e
...
...
@@ -40,7 +40,7 @@ extern void* warpctc_dso_handle;
std::call_once(warpctc_dso_flag, []() { \
warpctc_dso_handle = paddle::platform::dynload::GetWarpCTCDsoHandle(); \
}); \
void* p_##_name = dlsym(warpctc_dso_handle, #__name);
\
static void* p_##_name = dlsym(warpctc_dso_handle, #__name);
\
return reinterpret_cast<warpctcFunc>(p_##_name)(args...); \
} \
}; \
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
b6c8701e
...
...
@@ -519,6 +519,14 @@ All parameter, weight, gradient are variables in Paddle.
[](
const
ExecutionStrategy
&
self
)
{
return
self
.
allow_op_delay_
;
},
[](
ExecutionStrategy
&
self
,
bool
allow_op_delay
)
{
self
.
allow_op_delay_
=
allow_op_delay
;
})
.
def_property
(
"num_iteration_per_drop_scope"
,
[](
const
ExecutionStrategy
&
self
)
{
return
self
.
num_iteration_per_drop_scope_
;
},
[](
ExecutionStrategy
&
self
,
size_t
num_iteration_per_drop_scope
)
{
self
.
num_iteration_per_drop_scope_
=
num_iteration_per_drop_scope
;
});
py
::
class_
<
BuildStrategy
>
build_strategy
(
pe
,
"BuildStrategy"
);
...
...
python/paddle/fluid/tests/test_concurrency.py
→
python/paddle/fluid/tests/
no_
test_concurrency.py
浏览文件 @
b6c8701e
文件已移动
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
b6c8701e
...
...
@@ -43,12 +43,10 @@ list(REMOVE_ITEM TEST_OPS test_warpctc_op)
list
(
REMOVE_ITEM TEST_OPS test_dist_train
)
list
(
REMOVE_ITEM TEST_OPS test_parallel_executor_crf
)
list
(
REMOVE_ITEM TEST_OPS test_parallel_executor_fetch_feed
)
# TODO(wuyi): this test hungs on CI, will add it back later
list
(
REMOVE_ITEM TEST_OPS test_listen_and_serv_op
)
foreach
(
TEST_OP
${
TEST_OPS
}
)
py_test_modules
(
${
TEST_OP
}
MODULES
${
TEST_OP
}
)
endforeach
(
TEST_OP
)
py_test_modules
(
test_warpctc_op MODULES test_warpctc_op ENVS FLAGS_warpctc_dir=
${
WARPCTC_LIB_DIR
}
SERIAL
)
py_test_modules
(
test_dist_train MODULES test_dist_train SERIAL
)
# FIXME(Yancey1989): this test would cost much more time on CUDAPlace
# since load cudnn libraries, so we use a longer timeout to make this
# unit test stability.
set_tests_properties
(
test_listen_and_serv_op PROPERTIES TIMEOUT 30
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录