Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
aff8a26d
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
aff8a26d
编写于
5月 07, 2018
作者:
C
chengduoZH
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
check generated_op_
上级
2e5d44f1
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
26 addition
and
30 deletion
+26
-30
paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc
paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc
+1
-1
paddle/fluid/framework/details/send_op_handle.cc
paddle/fluid/framework/details/send_op_handle.cc
+1
-1
python/paddle/fluid/tests/unittests/test_parallel_executor.py
...on/paddle/fluid/tests/unittests/test_parallel_executor.py
+24
-28
未找到文件。
paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc
浏览文件 @
aff8a26d
...
...
@@ -36,7 +36,7 @@ void NCCLAllReduceOpHandle::RunImpl() {
// Wait input done
for
(
auto
*
in
:
inputs_
)
{
auto
&
p
=
static_cast
<
VarHandle
*>
(
in
)
->
place_
;
in
->
generated_op_
->
Wait
(
dev_ctxes_
[
p
]);
i
f
(
in
->
generated_op_
)
i
n
->
generated_op_
->
Wait
(
dev_ctxes_
[
p
]);
}
auto
&
var_name
=
static_cast
<
VarHandle
*>
(
this
->
inputs_
[
0
])
->
name_
;
...
...
paddle/fluid/framework/details/send_op_handle.cc
浏览文件 @
aff8a26d
...
...
@@ -32,7 +32,7 @@ void SendOpHandle::RunImpl() {
if
(
in
->
DebugString
()
==
"dummy"
)
{
// HACK
continue
;
}
in
->
generated_op_
->
Wait
(
dev_ctxes_
[
p
]);
i
f
(
in
->
generated_op_
)
i
n
->
generated_op_
->
Wait
(
dev_ctxes_
[
p
]);
}
auto
&
tmp_scope
=
local_scope_
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
();
// FIXME(wuyi): can not use RunAndRecordEvent here, for it will cause dead
...
...
python/paddle/fluid/tests/unittests/test_parallel_executor.py
浏览文件 @
aff8a26d
...
...
@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
numpy
import
numpy
as
np
import
unittest
import
paddle.fluid
as
fluid
...
...
@@ -243,7 +243,7 @@ class TestParallelExecutorBase(unittest.TestCase):
begin
=
time
.
time
()
first_loss
,
=
run_executor
(
exe
=
exe
,
feed
=
feed_dict
,
fetch_list
=
[
loss
.
name
])
first_loss
=
n
umpy
.
array
(
first_loss
)
first_loss
=
n
p
.
array
(
first_loss
)
for
i
in
xrange
(
iter
):
run_executor
(
exe
=
exe
,
feed
=
feed_dict
,
fetch_list
=
[])
...
...
@@ -256,7 +256,7 @@ class TestParallelExecutorBase(unittest.TestCase):
print
"%.4f Instance per second"
%
(
(
batch_size
*
iter
+
2
)
/
(
end
-
begin
))
last_loss
=
n
umpy
.
array
(
last_loss
)
last_loss
=
n
p
.
array
(
last_loss
)
print
first_loss
,
last_loss
# self.assertGreater(first_loss[0], last_loss[0])
...
...
@@ -284,8 +284,8 @@ class TestMNIST(TestParallelExecutorBase):
self
.
check_network_convergence
(
simple_fc_net
)
self
.
check_network_convergence
(
simple_fc_net
,
allow_op_delay
=
True
)
img
=
n
umpy
.
zeros
(
shape
=
[
32
,
784
],
dtype
=
'float32'
)
label
=
n
umpy
.
ones
(
shape
=
[
32
,
1
],
dtype
=
'int64'
)
img
=
n
p
.
zeros
(
shape
=
[
32
,
784
],
dtype
=
'float32'
)
label
=
n
p
.
ones
(
shape
=
[
32
,
1
],
dtype
=
'int64'
)
self
.
check_network_convergence
(
simple_fc_net
,
feed_dict
=
{
"image"
:
img
,
"label"
:
label
})
...
...
@@ -294,8 +294,8 @@ class TestMNIST(TestParallelExecutorBase):
self
.
check_simple_fc_convergence
()
def
check_simple_fc_parallel_accuracy
(
self
):
img
=
n
umpy
.
zeros
(
shape
=
[
32
,
784
],
dtype
=
'float32'
)
label
=
n
umpy
.
ones
(
shape
=
[
32
,
1
],
dtype
=
'int64'
)
img
=
n
p
.
zeros
(
shape
=
[
32
,
784
],
dtype
=
'float32'
)
label
=
n
p
.
ones
(
shape
=
[
32
,
1
],
dtype
=
'int64'
)
single_first_loss
,
single_last_loss
=
self
.
check_network_convergence
(
method
=
simple_fc_net
,
seed
=
1000
,
...
...
@@ -319,8 +319,8 @@ class TestMNIST(TestParallelExecutorBase):
def
check_batchnorm_fc_convergence
(
self
):
self
.
check_network_convergence
(
fc_with_batchnorm
)
img
=
n
umpy
.
zeros
(
shape
=
[
32
,
784
],
dtype
=
'float32'
)
label
=
n
umpy
.
ones
(
shape
=
[
32
,
1
],
dtype
=
'int64'
)
img
=
n
p
.
zeros
(
shape
=
[
32
,
784
],
dtype
=
'float32'
)
label
=
n
p
.
ones
(
shape
=
[
32
,
1
],
dtype
=
'int64'
)
self
.
check_network_convergence
(
fc_with_batchnorm
,
feed_dict
=
{
"image"
:
img
,
"label"
:
label
})
...
...
@@ -404,9 +404,6 @@ class ModelHyperParams(object):
dropout
=
0.1
import
numpy
as
np
def
prepare_batch_input
(
insts
,
src_pad_idx
,
trg_pad_idx
,
n_head
):
"""
Pad the instances to the max sequence length in batch, and generate the
...
...
@@ -533,9 +530,8 @@ class ParallelExecutorTestingDuringTraining(unittest.TestCase):
opt
.
minimize
(
loss
)
batch_size
=
32
image
=
numpy
.
random
.
normal
(
size
=
(
batch_size
,
784
)).
astype
(
'float32'
)
label
=
numpy
.
random
.
randint
(
0
,
10
,
(
batch_size
,
1
),
dtype
=
"int64"
)
image
=
np
.
random
.
normal
(
size
=
(
batch_size
,
784
)).
astype
(
'float32'
)
label
=
np
.
random
.
randint
(
0
,
10
,
(
batch_size
,
1
),
dtype
=
"int64"
)
place
=
fluid
.
CUDAPlace
(
0
)
exe
=
fluid
.
Executor
(
place
)
...
...
@@ -552,12 +548,12 @@ class ParallelExecutorTestingDuringTraining(unittest.TestCase):
for
i
in
xrange
(
5
):
test_loss
,
=
test_exe
.
run
([
loss
.
name
],
feed
=
feed_dict
)
test_loss
=
n
umpy
.
array
(
test_loss
)
test_loss
=
n
p
.
array
(
test_loss
)
train_loss
,
=
train_exe
.
run
([
loss
.
name
],
feed
=
feed_dict
)
train_loss
=
n
umpy
.
array
(
train_loss
)
train_loss
=
n
p
.
array
(
train_loss
)
self
.
assertTrue
(
n
umpy
.
allclose
(
n
p
.
allclose
(
train_loss
,
test_loss
,
atol
=
1e-8
),
"Train loss: "
+
str
(
train_loss
)
+
"
\n
Test loss:"
+
str
(
test_loss
))
...
...
@@ -712,7 +708,7 @@ class TestCRFModel(unittest.TestCase):
data
=
train_data
()
for
i
in
xrange
(
10
):
cur_batch
=
next
(
data
)
print
map
(
n
umpy
.
array
,
print
map
(
n
p
.
array
,
pe
.
run
(
feed
=
feeder
.
feed
(
cur_batch
),
fetch_list
=
[
avg_cost
.
name
]))[
0
]
...
...
@@ -723,7 +719,7 @@ class TestCRFModel(unittest.TestCase):
self
.
check_network_convergence
(
is_sparse
=
False
)
# test fetch
op
# test fetch
all the variables of global_block
import
paddle.dataset.flowers
as
flowers
...
...
@@ -763,7 +759,8 @@ class TestFetchOp(unittest.TestCase):
opt
.
minimize
(
loss
)
# TODO(zcd): I found that onece the memory optimizer is open,
# parallel_exe doesn't fetch some variable, such as conv2d_0.b_0@GRAD, conv2d_1.b_0@GRAD.
# parallel_exe doesn't fetch some variable, such as conv2d_0.b_0@GRAD,
# conv2d_1.b_0@GRAD. Those variables should not be pruned.
# fluid.memory_optimize(main)
place
=
fluid
.
CUDAPlace
(
0
)
...
...
@@ -775,16 +772,15 @@ class TestFetchOp(unittest.TestCase):
use_cuda
=
True
,
loss_name
=
loss
.
name
,
main_program
=
main
)
fetch_list
=
[]
for
data
in
train_inputs
:
all_vars
=
main
.
global_block
().
vars
for
k
,
v
in
all_vars
.
iteritems
():
if
v
.
persistable
and
'velocity'
not
in
k
:
if
'velocity'
not
in
k
:
fetch_list
.
append
(
k
)
for
data
in
train_inputs
:
ret
=
pe
.
run
(
fetch_list
,
feed
=
feeder
.
feed
(
data
))
result
=
{}
for
i
in
range
(
len
(
fetch_list
)):
result
[
fetch_list
[
i
]]
=
np
.
sum
(
ret
[
i
]
)
print
(
"%s - %s"
%
(
fetch_list
[
i
],
np
.
sum
(
ret
[
i
]))
)
def
test_update_sparse_parameter
(
self
):
tst_reader
=
paddle
.
batch
(
flowers
.
test
(
use_xmap
=
False
),
batch_size
=
16
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录