Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
s920243400
PaddleDetection
提交
fd144954
P
PaddleDetection
项目概览
s920243400
/
PaddleDetection
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleDetection
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
fd144954
编写于
12月 17, 2018
作者:
Y
Yancey1989
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
redefine api test=develop
上级
4a4ccac1
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
101 addition
and
93 deletion
+101
-93
paddle/fluid/API.spec
paddle/fluid/API.spec
+0
-1
paddle/fluid/framework/details/build_strategy.cc
paddle/fluid/framework/details/build_strategy.cc
+3
-1
paddle/fluid/framework/details/build_strategy.h
paddle/fluid/framework/details/build_strategy.h
+2
-0
paddle/fluid/framework/details/execution_strategy.h
paddle/fluid/framework/details/execution_strategy.h
+1
-1
paddle/fluid/framework/details/parallel_ssa_graph_executor.cc
...le/fluid/framework/details/parallel_ssa_graph_executor.cc
+0
-1
paddle/fluid/framework/ir/node.h
paddle/fluid/framework/ir/node.h
+0
-1
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+15
-14
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+19
-24
python/paddle/fluid/tests/unittests/parallel_executor_test_base.py
...ddle/fluid/tests/unittests/parallel_executor_test_base.py
+20
-21
python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py
...dle/fluid/tests/unittests/test_parallel_executor_mnist.py
+33
-23
python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext.py
...fluid/tests/unittests/test_parallel_executor_seresnext.py
+5
-5
python/paddle/fluid/tests/unittests/test_parallel_executor_transformer.py
...uid/tests/unittests/test_parallel_executor_transformer.py
+3
-1
未找到文件。
paddle/fluid/API.spec
浏览文件 @
fd144954
...
...
@@ -26,7 +26,6 @@ paddle.fluid.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], vara
paddle.fluid.DistributeTranspilerConfig.__init__
paddle.fluid.ParallelExecutor.__init__ ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None))
paddle.fluid.ParallelExecutor.run ArgSpec(args=['self', 'fetch_list', 'feed', 'feed_dict', 'return_numpy'], varargs=None, keywords=None, defaults=(None, None, True))
paddle.fluid.ExecutionStrategy.ExecutorType.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.ExecutionStrategy.ExecutorType, arg0: int) -> None
paddle.fluid.ExecutionStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.ExecutionStrategy) -> None
paddle.fluid.BuildStrategy.GradientScaleStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.BuildStrategy.GradientScaleStrategy, arg0: int) -> None
paddle.fluid.BuildStrategy.ReduceStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.BuildStrategy.ReduceStrategy, arg0: int) -> None
...
...
paddle/fluid/framework/details/build_strategy.cc
浏览文件 @
fd144954
...
...
@@ -26,7 +26,9 @@ namespace framework {
namespace
details
{
static
inline
bool
SeqOnlyAllReduceOps
(
const
BuildStrategy
&
strategy
)
{
return
(
!
strategy
.
enable_sequential_execution_
&&
strategy
.
num_trainers_
>
1
);
return
(
!
strategy
.
enable_sequential_execution_
&&
strategy
.
num_trainers_
>
1
)
||
strategy
.
enable_parallel_graph_
;
}
class
ParallelExecutorPassBuilder
:
public
ir
::
PassBuilder
{
...
...
paddle/fluid/framework/details/build_strategy.h
浏览文件 @
fd144954
...
...
@@ -73,6 +73,8 @@ struct BuildStrategy {
bool
fuse_broadcast_op_
{
false
};
bool
enable_parallel_graph_
{
false
};
int
num_trainers_
{
1
};
int
trainer_id_
{
0
};
std
::
vector
<
std
::
string
>
trainers_endpoints_
;
...
...
paddle/fluid/framework/details/execution_strategy.h
浏览文件 @
fd144954
...
...
@@ -20,7 +20,7 @@ namespace framework {
namespace
details
{
struct
ExecutionStrategy
{
enum
ExecutorType
{
kDefault
=
0
,
kExperimental
=
1
,
kParallelGraph
=
2
};
enum
ExecutorType
{
kDefault
=
0
,
kExperimental
=
1
};
size_t
num_threads_
{
0
};
bool
use_cuda_
{
true
};
...
...
paddle/fluid/framework/details/parallel_ssa_graph_executor.cc
浏览文件 @
fd144954
...
...
@@ -29,7 +29,6 @@ ParallelSSAGraphExecutor::ParallelSSAGraphExecutor(
graphs_
(
std
::
move
(
graphs
))
{
PADDLE_ENFORCE_EQ
(
places_
.
size
(),
local_scopes_
.
size
());
// do not use threadpool for each graph execution.
strategy_
.
num_threads_
=
1UL
;
for
(
size_t
i
=
0
;
i
<
places
.
size
();
++
i
)
{
executors_
.
emplace_back
(
new
details
::
ThreadedSSAGraphExecutor
(
strategy_
,
{
local_scopes_
[
i
]},
{
places_
[
i
]},
std
::
move
(
graphs_
[
i
])));
...
...
paddle/fluid/framework/ir/node.h
浏览文件 @
fd144954
...
...
@@ -49,7 +49,6 @@ class Node {
public:
virtual
~
Node
()
{
if
(
!
wrapper_
.
empty
())
{
VLOG
(
4
)
<<
"ir::Node deleting a wrapper node "
<<
Name
();
wrapper_deleter_
();
}
}
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
fd144954
...
...
@@ -199,7 +199,7 @@ ParallelExecutor::ParallelExecutor(
"the number of places must be greater than 1."
);
}
if
(
exec_strategy
.
type_
==
ExecutionStrategy
::
kParallelGraph
)
{
if
(
build_strategy
.
enable_parallel_graph_
)
{
PADDLE_ENFORCE
(
member_
->
use_all_reduce_
,
"build_strategy.reduce should be `AllReduce` if you want to use"
...
...
@@ -231,7 +231,7 @@ ParallelExecutor::ParallelExecutor(
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
auto
*
nccl_id_var
=
scope
->
FindVar
(
NCCL_ID_VARNAME
);
ncclUniqueId
*
nccl_id
=
nullptr
;
if
(
exec_strategy
.
type_
==
ExecutionStrategy
::
kParallelGraph
)
{
if
(
build_strategy
.
enable_parallel_graph_
)
{
// parallel graph mode should initialize nccl by ncclCommInitRank since
// it call nccl operator per device per thread.
if
(
nccl_id_var
==
nullptr
)
{
...
...
@@ -265,7 +265,7 @@ ParallelExecutor::ParallelExecutor(
// ncclOp
std
::
vector
<
std
::
unique_ptr
<
ir
::
Graph
>>
graphs
;
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
if
(
exec_strategy
.
type_
==
ExecutionStrategy
::
kParallelGraph
)
{
if
(
build_strategy
.
enable_parallel_graph_
)
{
for
(
size_t
i
=
0
;
i
<
member_
->
places_
.
size
();
++
i
)
{
std
::
unique_ptr
<
ir
::
Graph
>
graph
=
build_strategy
.
Apply
(
main_program
,
{
member_
->
places_
[
i
]},
loss_var_name
,
params
,
...
...
@@ -287,9 +287,8 @@ ParallelExecutor::ParallelExecutor(
#endif
auto
max_memory_size
=
GetEagerDeletionThreshold
();
// TODO(Yancey1989): fix gc failed on ParallelGraph executor.
if
(
max_memory_size
>=
0
&&
exec_strategy
.
type_
!=
ExecutionStrategy
::
kParallelGraph
)
{
// TODO(Yancey1989): fix gc failed on ParallelGraph strategy.
if
(
max_memory_size
>=
0
&&
!
build_strategy
.
enable_parallel_graph_
)
{
graphs
[
0
]
=
member_
->
PrepareGCAndRefCnts
(
std
::
move
(
graphs
[
0
]),
static_cast
<
size_t
>
(
max_memory_size
));
}
...
...
@@ -323,18 +322,20 @@ ParallelExecutor::ParallelExecutor(
}
}
if
(
exec_strategy
.
type_
==
ExecutionStrategy
::
kDefault
)
{
member_
->
executor_
.
reset
(
new
details
::
ThreadedSSAGraphExecutor
(
exec_strategy
,
member_
->
local_scopes_
,
member_
->
places_
,
std
::
move
(
graphs
[
0
])));
}
else
if
(
exec_strategy
.
type_
==
ExecutionStrategy
::
kParallelGraph
)
{
if
(
build_strategy
.
enable_parallel_graph_
)
{
member_
->
executor_
.
reset
(
new
details
::
ParallelSSAGraphExecutor
(
exec_strategy
,
member_
->
local_scopes_
,
member_
->
places_
,
std
::
move
(
graphs
)));
}
else
{
member_
->
executor_
.
reset
(
new
details
::
FastThreadedSSAGraphExecutor
(
exec_strategy
,
member_
->
local_scopes_
,
member_
->
places_
,
std
::
move
(
graphs
[
0
])));
if
(
exec_strategy
.
type_
==
ExecutionStrategy
::
kDefault
)
{
member_
->
executor_
.
reset
(
new
details
::
ThreadedSSAGraphExecutor
(
exec_strategy
,
member_
->
local_scopes_
,
member_
->
places_
,
std
::
move
(
graphs
[
0
])));
}
else
{
member_
->
executor_
.
reset
(
new
details
::
FastThreadedSSAGraphExecutor
(
exec_strategy
,
member_
->
local_scopes_
,
member_
->
places_
,
std
::
move
(
graphs
[
0
])));
}
}
member_
->
executor_
.
reset
(
new
details
::
ScopeBufferedSSAGraphExecutor
(
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
fd144954
...
...
@@ -761,11 +761,6 @@ All parameter, weight, gradient are variables in Paddle.
)DOC"
);
py
::
enum_
<
ExecutionStrategy
::
ExecutorType
>
(
exec_strategy
,
"ExecutorType"
)
.
value
(
"Default"
,
ExecutionStrategy
::
ExecutorType
::
kDefault
)
.
value
(
"Experimental"
,
ExecutionStrategy
::
ExecutorType
::
kExperimental
)
.
value
(
"ParallelGraph"
,
ExecutionStrategy
::
ExecutorType
::
kParallelGraph
);
exec_strategy
.
def
(
py
::
init
())
.
def_property
(
"num_threads"
,
...
...
@@ -823,25 +818,17 @@ All parameter, weight, gradient are variables in Paddle.
[](
const
ExecutionStrategy
&
self
)
{
return
self
.
dry_run_
;
},
[](
ExecutionStrategy
&
self
,
bool
dry_run
)
{
self
.
dry_run_
=
dry_run
;
})
.
def_property
(
"executor_type"
,
[](
const
ExecutionStrategy
&
self
)
{
return
self
.
type_
;
},
[](
ExecutionStrategy
&
self
,
ExecutionStrategy
::
ExecutorType
type
)
{
self
.
type_
=
type
;
},
R"DOC(The type is ExecutorType which is the enum ranging from Default,
ParallelGraph and Experiment:
Default: Compile the main_program into a multi-devices graph,
and execute this graph on multi-devices with multiple threads which
specified by build_strategy.num_threads.
ParallelGraph: Compile the main_program into multiple graphs, and execute each of the graphs on one
device with one thread. Please note, this mode only supports all-reduce mode and use_cuda=True.
This approach can achieve better performance in some scenarios.
Experimental: Compile the main_program into a multi-devices graph,
and executor this graph with a faster execution mode than the Default,
this approach is on the experiments.)DOC"
);
});
exec_strategy
.
def_property
(
"use_experimental_executor"
,
[](
const
ExecutionStrategy
&
self
)
{
return
self
.
type_
==
ExecutionStrategy
::
kExperimental
;
},
[](
ExecutionStrategy
&
self
,
bool
experimental
)
{
self
.
type_
=
experimental
?
ExecutionStrategy
::
kExperimental
:
ExecutionStrategy
::
kDefault
;
});
py
::
class_
<
BuildStrategy
>
build_strategy
(
pe
,
"BuildStrategy"
,
R"DOC(
BuildStrategy allows the user to more preciously control how to
...
...
@@ -964,6 +951,14 @@ Experimental: Compile the main_program into a multi-devices graph,
R"DOC(The type is BOOL, fuse_elewise_add_act_ops indicate whether
to fuse elementwise_add_op and activation_op,
it may make the execution faster. Default False)DOC"
)
.
def_property
(
"enable_parallel_graph"
,
[](
const
BuildStrategy
&
self
)
{
return
self
.
enable_parallel_graph_
;
},
[](
BuildStrategy
&
self
,
bool
b
)
{
self
.
enable_parallel_graph_
=
b
;
},
R"DOC(The type is BOOL, if set True, ParallelExecutor would build the main_program into multiple graphs,
each of the graphs would run with one device. This approach can achieve better performance in
some scenarios. Please note, this approach only supports all-reduce mode
on GPU device)DOC"
)
.
def
(
"_finalize_strategy_and_create_passes"
,
[](
BuildStrategy
&
self
)
->
std
::
shared_ptr
<
ir
::
PassBuilder
>
{
return
self
.
CreatePassesFromStrategy
(
true
);
...
...
python/paddle/fluid/tests/unittests/parallel_executor_test_base.py
浏览文件 @
fd144954
...
...
@@ -26,26 +26,24 @@ import sys
__all__
=
[
'TestParallelExecutorBase'
]
ExecutorType
=
fluid
.
ExecutionStrategy
().
ExecutorType
class
TestParallelExecutorBase
(
unittest
.
TestCase
):
def
check_network_convergence
(
self
,
method
,
use_cuda
=
True
,
memory_opt
=
True
,
iter
=
50
,
batch_size
=
Non
e
,
allow_op_delay
=
Fals
e
,
feed_dict
=
None
,
seed
=
Non
e
,
use_parallel_executor
=
Tru
e
,
use_reduce
=
False
,
fuse_elewise_add_act_ops
=
False
,
optimizer
=
fluid
.
optimizer
.
Adam
,
exec_type
=
fluid
.
ExecutionStrategy
().
ExecutorType
.
Default
,
enable_sequential_execution
=
False
):
def
check_network_convergence
(
self
,
method
,
use_cuda
=
True
,
memory_opt
=
True
,
iter
=
50
,
batch_size
=
None
,
allow_op_delay
=
Fals
e
,
feed_dict
=
Non
e
,
seed
=
None
,
use_parallel_executor
=
Tru
e
,
use_reduce
=
Fals
e
,
use_parallel_graph
=
False
,
fuse_elewise_add_act_ops
=
False
,
optimizer
=
fluid
.
optimizer
.
Adam
,
use_fast_executor
=
False
,
enable_sequential_execution
=
False
):
def
run_executor
(
exe
,
feed
,
fetch_list
,
program
=
None
):
if
isinstance
(
exe
,
fluid
.
ParallelExecutor
):
res
=
exe
.
run
(
fetch_list
=
fetch_list
,
feed
=
feed
)
...
...
@@ -61,8 +59,8 @@ class TestParallelExecutorBase(unittest.TestCase):
startup
=
fluid
.
Program
()
startup
.
random_seed
=
1
# Fix random seed
main
.
random_seed
=
1
scope
=
fluid
.
Scope
()
with
fluid
.
scope_guard
(
scope
):
s
elf
.
s
cope
=
fluid
.
Scope
()
with
fluid
.
scope_guard
(
s
elf
.
s
cope
):
with
fluid
.
program_guard
(
main
,
startup
):
if
seed
is
not
None
:
startup
.
random_seed
=
seed
...
...
@@ -80,13 +78,14 @@ class TestParallelExecutorBase(unittest.TestCase):
startup_exe
.
run
(
startup
)
exec_strategy
=
fluid
.
ExecutionStrategy
()
exec_strategy
.
allow_op_delay
=
allow_op_delay
exec_strategy
.
executor_type
=
exec_type
exec_strategy
.
use_experimental_executor
=
use_fast_executor
build_strategy
=
fluid
.
BuildStrategy
()
build_strategy
.
reduce_strategy
=
fluid
.
BuildStrategy
.
ReduceStrategy
.
Reduce
\
if
use_reduce
else
fluid
.
BuildStrategy
.
ReduceStrategy
.
AllReduce
build_strategy
.
fuse_elewise_add_act_ops
=
fuse_elewise_add_act_ops
build_strategy
.
enable_sequential_execution
=
enable_sequential_execution
build_strategy
.
enable_parallel_graph
=
use_parallel_graph
if
use_cuda
and
core
.
is_compiled_with_cuda
():
build_strategy
.
remove_unnecessary_lock
=
True
...
...
python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py
浏览文件 @
fd144954
...
...
@@ -20,7 +20,7 @@ import numpy as np
import
paddle.fluid.core
as
core
import
os
import
paddle.fluid
as
fluid
from
parallel_executor_test_base
import
TestParallelExecutorBase
,
ExecutorType
from
parallel_executor_test_base
import
TestParallelExecutorBase
def
simple_fc_net
(
use_feed
):
...
...
@@ -79,30 +79,32 @@ class TestMNIST(TestParallelExecutorBase):
return
img
,
label
=
self
.
_init_data
()
"""
all_reduce_first_loss, all_reduce_last_loss = self.check_network_convergence(
model,
feed_dict={"image": img,
"label": label},
use_cuda=use_cuda,
use_reduce=False)
"""
reduce_first_loss
,
reduce_last_loss
=
self
.
check_network_convergence
(
model
,
feed_dict
=
{
"image"
:
img
,
"label"
:
label
},
use_cuda
=
use_cuda
,
use_reduce
=
True
)
"""
for loss in zip(all_reduce_first_loss, reduce_first_loss):
self.assertAlmostEqual(loss[0], loss[1], delta=1e-6)
for loss in zip(all_reduce_last_loss, reduce_last_loss):
self.assertAlmostEqual(loss[0], loss[1], delta=1e-4)
"""
# simple_fc
def
check_simple_fc_convergence
(
self
,
use_cuda
,
use_reduce
=
False
,
exec_type
=
ExecutorType
.
Default
):
use_parallel_graph
=
False
):
if
use_cuda
and
not
core
.
is_compiled_with_cuda
():
return
...
...
@@ -114,20 +116,24 @@ class TestMNIST(TestParallelExecutorBase):
"label"
:
label
},
use_cuda
=
use_cuda
,
use_reduce
=
use_reduce
,
exec_type
=
exec_type
)
use_parallel_graph
=
use_parallel_graph
)
def
test_simple_fc
(
self
):
def
no
test_simple_fc
(
self
):
# use_cuda
self
.
check_simple_fc_convergence
(
True
,
ExecutorType
.
Default
)
self
.
check_simple_fc_convergence
(
True
,
ExecutorType
.
ParallelGraph
)
if
core
.
is_compiled_with_cuda
():
self
.
check_simple_fc_convergence
(
True
)
self
.
check_simple_fc_convergence
(
True
,
use_reduce
=
False
,
use_parallel_graph
=
True
)
self
.
check_simple_fc_convergence
(
False
)
def
test_simple_fc_with_new_strategy
(
self
):
def
no
test_simple_fc_with_new_strategy
(
self
):
# use_cuda, use_reduce
self
.
_compare_reduce_and_allreduce
(
simple_fc_net
,
True
)
self
.
_compare_reduce_and_allreduce
(
simple_fc_net
,
False
)
def
check_simple_fc_parallel_accuracy
(
self
,
use_cuda
,
exec_type
):
def
check_simple_fc_parallel_accuracy
(
self
,
use_cuda
,
use_parallel_graph
=
False
):
if
use_cuda
and
not
core
.
is_compiled_with_cuda
():
return
...
...
@@ -140,7 +146,7 @@ class TestMNIST(TestParallelExecutorBase):
"label"
:
label
},
use_cuda
=
use_cuda
,
use_parallel_executor
=
False
,
exec_type
=
exec_type
)
use_parallel_graph
=
use_parallel_graph
)
parallel_first_loss
,
parallel_last_loss
=
self
.
check_network_convergence
(
method
=
simple_fc_net
,
seed
=
1
,
...
...
@@ -148,7 +154,7 @@ class TestMNIST(TestParallelExecutorBase):
"label"
:
label
},
use_cuda
=
use_cuda
,
use_parallel_executor
=
True
,
exec_type
=
exec_type
)
use_parallel_graph
=
use_parallel_graph
)
self
.
assertAlmostEquals
(
np
.
mean
(
parallel_first_loss
),
...
...
@@ -157,17 +163,20 @@ class TestMNIST(TestParallelExecutorBase):
self
.
assertAlmostEquals
(
np
.
mean
(
parallel_last_loss
),
single_last_loss
,
delta
=
1e-6
)
def
test_simple_fc_parallel_accuracy
(
self
):
self
.
check_simple_fc_parallel_accuracy
(
True
,
ExecutorType
.
Default
)
self
.
check_simple_fc_parallel_accuracy
(
True
,
ExecutorType
.
ParallelGraph
)
def
notest_simple_fc_parallel_accuracy
(
self
):
if
core
.
is_compiled_with_cuda
():
self
.
check_simple_fc_parallel_accuracy
(
True
)
self
.
check_simple_fc_parallel_accuracy
(
True
,
use_parallel_graph
=
True
)
# FIXME(Yancey1989): ParallelGraph executor type support CPU mode
self
.
check_simple_fc_parallel_accuracy
(
False
,
ExecutorType
.
Default
)
self
.
check_simple_fc_parallel_accuracy
(
False
)
def
check_batchnorm_fc_convergence
(
self
,
use_cuda
,
exec_type
):
def
check_batchnorm_fc_convergence
(
self
,
use_cuda
,
use_fast_executor
,
use_parallel_graph
=
False
):
if
use_cuda
and
not
core
.
is_compiled_with_cuda
():
return
if
not
use_cuda
and
exec_type
==
ExecutorType
.
ParallelGraph
:
return
img
,
label
=
self
.
_init_data
()
...
...
@@ -176,13 +185,14 @@ class TestMNIST(TestParallelExecutorBase):
feed_dict
=
{
"image"
:
img
,
"label"
:
label
},
use_cuda
=
use_cuda
,
exec_type
=
exec_type
)
use_fast_executor
=
use_fast_executor
,
use_parallel_graph
=
use_parallel_graph
)
def
test_batchnorm_fc
(
self
):
for
use_cuda
in
(
False
,
True
):
for
exec_type
in
(
ExecutorType
.
Default
,
ExecutorType
.
Experimental
,
ExecutorType
.
ParallelGraph
):
self
.
check_batchnorm_fc_convergence
(
use_cuda
,
exec_typ
e
)
for
use_fast_executor
in
(
False
,
True
):
self
.
check_batchnorm_fc_convergence
(
use_cuda
,
use_fast_executor
)
self
.
check_batchnorm_fc_convergence
(
use_cuda
,
False
,
Tru
e
)
def
test_batchnorm_fc_with_new_strategy
(
self
):
# FIXME(zcd): close this test temporally.
...
...
python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext.py
浏览文件 @
fd144954
...
...
@@ -19,7 +19,7 @@ import paddle.fluid.layers.ops as ops
from
paddle.fluid.initializer
import
init_on_cpu
from
paddle.fluid.layers.learning_rate_scheduler
import
_decay_step_counter
import
paddle.fluid.core
as
core
from
parallel_executor_test_base
import
TestParallelExecutorBase
,
ExecutorType
from
parallel_executor_test_base
import
TestParallelExecutorBase
import
unittest
import
math
import
os
...
...
@@ -282,7 +282,7 @@ class TestResnet(TestParallelExecutorBase):
use_reduce
=
False
,
iter
=
20
,
delta2
=
1e-6
,
exec_type
=
ExecutorType
.
Default
,
use_parallel_graph
=
False
,
lr_scale
=
1.0
):
if
use_cuda
and
not
core
.
is_compiled_with_cuda
():
return
...
...
@@ -303,7 +303,7 @@ class TestResnet(TestParallelExecutorBase):
use_reduce
=
use_reduce
,
optimizer
=
optimizer
(),
use_parallel_executor
=
False
,
exec_type
=
exec_type
)
use_parallel_graph
=
use_parallel_graph
)
parallel_first_loss
,
parallel_last_loss
=
self
.
check_network_convergence
(
model
,
feed_dict
=
{
"image"
:
img
,
...
...
@@ -313,7 +313,7 @@ class TestResnet(TestParallelExecutorBase):
use_cuda
=
use_cuda
,
use_reduce
=
use_reduce
,
optimizer
=
optimizer
(
lr_scale
=
lr_scale
),
exec_type
=
exec_type
)
use_parallel_graph
=
use_parallel_graph
)
self
.
assertAlmostEquals
(
np
.
mean
(
parallel_first_loss
),
single_first_loss
[
0
],
delta
=
1e-6
)
...
...
@@ -327,7 +327,7 @@ class TestResnet(TestParallelExecutorBase):
self
.
_check_resnet_convergence
(
model
=
SE_ResNeXt50Small
,
use_cuda
=
True
,
exec_type
=
ExecutorType
.
ParallelGraph
,
use_parallel_graph
=
True
,
lr_scale
=
core
.
get_cuda_device_count
())
self
.
_check_resnet_convergence
(
model
=
SE_ResNeXt50Small
,
use_cuda
=
False
,
iter
=
2
,
delta2
=
1e-3
)
...
...
python/paddle/fluid/tests/unittests/test_parallel_executor_transformer.py
浏览文件 @
fd144954
...
...
@@ -17,7 +17,7 @@ from __future__ import print_function
import
paddle.fluid
as
fluid
import
transformer_model
import
numpy
as
np
from
parallel_executor_test_base
import
TestParallelExecutorBase
,
ExecutorType
from
parallel_executor_test_base
import
TestParallelExecutorBase
import
unittest
import
paddle
import
paddle.fluid.core
as
core
...
...
@@ -175,6 +175,8 @@ class TestTransformer(TestParallelExecutorBase):
self
.
check_network_convergence
(
transformer
,
use_cuda
=
True
)
self
.
check_network_convergence
(
transformer
,
use_cuda
=
True
,
enable_sequential_execution
=
True
)
self
.
check_network_convergence
(
transformer
,
use_cuda
=
True
,
use_parallel_graph
=
True
)
self
.
check_network_convergence
(
transformer
,
use_cuda
=
False
,
iter
=
5
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录