Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
a57e8a43
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a57e8a43
编写于
6月 11, 2018
作者:
C
chengduoZH
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add cpu test
上级
1e731f59
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
20 addition
and
14 deletion
+20
-14
paddle/fluid/framework/details/all_reduce_op_handle.cc
paddle/fluid/framework/details/all_reduce_op_handle.cc
+1
-1
python/paddle/fluid/parallel_executor.py
python/paddle/fluid/parallel_executor.py
+3
-4
python/paddle/fluid/tests/unittests/parallel_executor_test_base.py
...ddle/fluid/tests/unittests/parallel_executor_test_base.py
+5
-1
python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py
...dle/fluid/tests/unittests/test_parallel_executor_mnist.py
+5
-3
python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext.py
...fluid/tests/unittests/test_parallel_executor_seresnext.py
+6
-5
未找到文件。
paddle/fluid/framework/details/all_reduce_op_handle.cc
浏览文件 @
a57e8a43
...
...
@@ -67,7 +67,7 @@ void AllReduceOpHandle::RunImpl() {
if
(
platform
::
is_gpu_place
(
lod_tensors
[
0
]
->
place
()))
{
#ifdef PADDLE_WITH_CUDA
PADDLE_ENFORCE
(
nccl_ctxs_
);
PADDLE_ENFORCE
(
nccl_ctxs_
,
"nccl_ctxs should not be nullptr."
);
int
dtype
=
-
1
;
size_t
numel
=
0
;
std
::
vector
<
std
::
function
<
void
()
>>
all_reduce_calls
;
...
...
python/paddle/fluid/parallel_executor.py
浏览文件 @
a57e8a43
...
...
@@ -119,11 +119,10 @@ class ParallelExecutor(object):
if
use_cuda
:
# Experiments on se-resnext shows that too many threads hurt
# performance. Worth tunning for other models in the future.
exec_strategy
.
num_threads
=
len
(
self
.
_places
)
*
2
exec_strategy
.
num_threads
=
len
(
self
.
_places
)
*
4
else
:
cpu_num
=
int
(
os
.
environ
.
get
(
'CPU_NUM'
,
multiprocessing
.
cpu_count
()))
exec_strategy
.
num_threads
=
min
(
len
(
self
.
_places
)
*
2
,
cpu_num
)
# Currently num_threads must be 1.
exec_strategy
.
num_threads
=
1
if
build_strategy
is
None
:
build_strategy
=
BuildStrategy
()
...
...
python/paddle/fluid/tests/unittests/parallel_executor_test_base.py
浏览文件 @
a57e8a43
...
...
@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
multiprocessing
import
os
import
unittest
import
paddle.fluid
as
fluid
import
time
...
...
@@ -73,7 +75,9 @@ class TestParallelExecutorBase(unittest.TestCase):
exe
=
fluid
.
Executor
(
place
=
place
)
if
batch_size
is
not
None
:
batch_size
*=
fluid
.
core
.
get_cuda_device_count
()
batch_size
*=
fluid
.
core
.
get_cuda_device_count
(
)
if
use_cuda
else
int
(
os
.
environ
.
get
(
'CPU_NUM'
,
multiprocessing
.
cpu_count
()))
begin
=
time
.
time
()
first_loss
,
=
run_executor
(
exe
=
exe
,
feed
=
feed_dict
,
fetch_list
=
[
loss
.
name
])
...
...
python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py
浏览文件 @
a57e8a43
...
...
@@ -104,8 +104,9 @@ class TestMNIST(TestParallelExecutorBase):
def
check_simple_fc_convergence
(
self
,
balance_parameter_opt_between_cards
,
use_cuda
=
True
):
self
.
check_network_convergence
(
simple_fc_net
)
self
.
check_network_convergence
(
simple_fc_net
,
allow_op_delay
=
True
)
self
.
check_network_convergence
(
simple_fc_net
,
use_cuda
=
use_cuda
)
self
.
check_network_convergence
(
simple_fc_net
,
use_cuda
=
use_cuda
,
allow_op_delay
=
True
)
img
=
np
.
zeros
(
shape
=
[
32
,
784
],
dtype
=
'float32'
)
label
=
np
.
ones
(
shape
=
[
32
,
1
],
dtype
=
'int64'
)
...
...
@@ -142,6 +143,7 @@ class TestMNIST(TestParallelExecutorBase):
seed
=
1000
,
feed_dict
=
{
"image"
:
img
,
"label"
:
label
},
use_cuda
=
use_cuda
,
use_parallel_executor
=
True
,
balance_parameter_opt_between_cards
=
balance_parameter_opt_between_cards
)
...
...
@@ -161,7 +163,7 @@ class TestMNIST(TestParallelExecutorBase):
def
check_batchnorm_fc_convergence
(
self
,
balance_parameter_opt_between_cards
,
use_cuda
):
self
.
check_network_convergence
(
fc_with_batchnorm
)
self
.
check_network_convergence
(
fc_with_batchnorm
,
use_cuda
=
use_cuda
)
img
=
np
.
zeros
(
shape
=
[
32
,
784
],
dtype
=
'float32'
)
label
=
np
.
ones
(
shape
=
[
32
,
1
],
dtype
=
'int64'
)
self
.
check_network_convergence
(
...
...
python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext.py
浏览文件 @
a57e8a43
...
...
@@ -133,27 +133,28 @@ def SE_ResNeXt50Small(batch_size=2, use_feed=False):
class
TestResnet
(
TestParallelExecutorBase
):
def
check_resnet_convergence
(
self
,
balance_parameter_opt_between_cards
,
use_cuda
=
True
):
use_cuda
=
True
,
iter
=
20
):
import
functools
batch_size
=
2
self
.
check_network_convergence
(
functools
.
partial
(
SE_ResNeXt50Small
,
batch_size
=
batch_size
),
iter
=
20
,
iter
=
iter
,
batch_size
=
batch_size
,
use_cuda
=
use_cuda
,
balance_parameter_opt_between_cards
=
balance_parameter_opt_between_cards
)
def
test_resnet
(
self
):
#
os.environ['CPU_NUM'] = str(4)
os
.
environ
[
'CPU_NUM'
]
=
str
(
4
)
self
.
check_resnet_convergence
(
False
,
use_cuda
=
True
)
# self.check_resnet_convergence(False,use_cuda=False
)
self
.
check_resnet_convergence
(
False
,
use_cuda
=
False
,
iter
=
5
)
def
test_resnet_with_new_strategy
(
self
):
os
.
environ
[
'CPU_NUM'
]
=
str
(
4
)
self
.
check_resnet_convergence
(
True
,
use_cuda
=
True
)
self
.
check_resnet_convergence
(
True
,
use_cuda
=
False
)
self
.
check_resnet_convergence
(
True
,
use_cuda
=
False
,
iter
=
5
)
if
__name__
==
'__main__'
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录