Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
a97d5a61
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
a97d5a61
编写于
5月 13, 2020
作者:
T
tangwei12
提交者:
GitHub
5月 13, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix op error, test=develop (#24451)
上级
7c17ed57
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
30 addition
and
12 deletion
+30
-12
paddle/fluid/operators/distributed_ops/recv_op.cc
paddle/fluid/operators/distributed_ops/recv_op.cc
+3
-1
paddle/fluid/operators/distributed_ops/ref_by_trainer_id_op.cc
...e/fluid/operators/distributed_ops/ref_by_trainer_id_op.cc
+17
-8
paddle/fluid/operators/distributed_ops/ref_by_trainer_id_op.h
...le/fluid/operators/distributed_ops/ref_by_trainer_id_op.h
+4
-1
paddle/fluid/operators/distributed_ops/send_barrier_op.cc
paddle/fluid/operators/distributed_ops/send_barrier_op.cc
+3
-1
paddle/fluid/operators/distributed_ops/send_op.cc
paddle/fluid/operators/distributed_ops/send_op.cc
+3
-1
未找到文件。
paddle/fluid/operators/distributed_ops/recv_op.cc
浏览文件 @
a97d5a61
...
...
@@ -84,7 +84,9 @@ class RecvOp : public framework::OperatorBase {
}
for
(
size_t
i
=
0
;
i
<
rets
.
size
();
i
++
)
{
VLOG
(
7
)
<<
"before sync_recv "
<<
outs
[
i
]
<<
"from "
<<
epmap
[
i
];
PADDLE_ENFORCE_NE
(
rets
[
i
]
->
Wait
(),
0U
,
"internal error in RPCClient"
);
PADDLE_ENFORCE_NE
(
rets
[
i
]
->
Wait
(),
0U
,
platform
::
errors
::
ExecutionTimeout
(
"internal error in RPCClient"
));
VLOG
(
7
)
<<
"after sync_recv "
<<
outs
[
i
]
<<
"from "
<<
epmap
[
i
];
}
}
...
...
paddle/fluid/operators/distributed_ops/ref_by_trainer_id_op.cc
浏览文件 @
a97d5a61
...
...
@@ -27,14 +27,23 @@ class RefByTrainerIdOp : public framework::OperatorWithKernel {
:
OperatorWithKernel
(
type
,
inputs
,
outputs
,
attrs
)
{}
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInputs
(
"X"
),
"Input(X) of RefByTrainerIdOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"TrainerId"
),
"Input(TrainerId) of RefByTrainerIdOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of RefByTrainerIdOp should not be null."
);
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputDim
(
"TrainerId"
).
size
(),
1
,
"TrainerId should be a scalar."
);
PADDLE_ENFORCE_EQ
(
ctx
->
HasInputs
(
"X"
),
true
,
platform
::
errors
::
InvalidArgument
(
"Input(X) of RefByTrainerIdOp should not be null."
));
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"TrainerId"
),
true
,
platform
::
errors
::
InvalidArgument
(
"Input(TrainerId) of RefByTrainerIdOp should not be null."
));
PADDLE_ENFORCE_EQ
(
ctx
->
HasOutput
(
"Out"
),
true
,
platform
::
errors
::
InvalidArgument
(
"Output(Out) of RefByTrainerIdOp should not be null."
));
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputDim
(
"TrainerId"
).
size
(),
1
,
platform
::
errors
::
InvalidArgument
(
"TrainerId should be a scalar."
));
// Out's shape is determined at runtime.
}
...
...
paddle/fluid/operators/distributed_ops/ref_by_trainer_id_op.h
浏览文件 @
a97d5a61
...
...
@@ -38,7 +38,10 @@ class RefByTrainerIdKernel : public framework::OpKernel<T> {
}
else
{
trainer_id
=
*
trainer_id_data
;
}
PADDLE_ENFORCE_LT
((
size_t
)
trainer_id
,
in_list
.
size
());
PADDLE_ENFORCE_LT
((
size_t
)
trainer_id
,
in_list
.
size
(),
platform
::
errors
::
InvalidArgument
(
"X' size must >= TrainerId: [%s], but received [%s]"
,
trainer_id
,
in_list
.
size
()));
out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
framework
::
TensorCopy
(
*
(
in_list
[
trainer_id
]),
in_list
[
trainer_id
]
->
place
(),
out
);
...
...
paddle/fluid/operators/distributed_ops/send_barrier_op.cc
浏览文件 @
a97d5a61
...
...
@@ -59,7 +59,9 @@ class SendBarrierOp : public framework::OperatorBase {
}
for
(
size_t
i
=
0
;
i
<
rets
.
size
();
i
++
)
{
PADDLE_ENFORCE_NE
(
rets
[
i
]
->
Wait
(),
0U
,
"internal error in RPCClient"
);
PADDLE_ENFORCE_NE
(
rets
[
i
]
->
Wait
(),
0U
,
platform
::
errors
::
ExecutionTimeout
(
"internal error in RPCClient"
));
}
}
};
...
...
paddle/fluid/operators/distributed_ops/send_op.cc
浏览文件 @
a97d5a61
...
...
@@ -83,7 +83,9 @@ class SendOp : public framework::OperatorBase {
}
for
(
size_t
i
=
0
;
i
<
rets
.
size
();
i
++
)
{
VLOG
(
7
)
<<
"before sync_send "
<<
ins
[
i
]
<<
"from "
<<
epmap
[
i
];
PADDLE_ENFORCE_NE
(
rets
[
i
]
->
Wait
(),
0U
,
"internal error in RPCClient"
);
PADDLE_ENFORCE_NE
(
rets
[
i
]
->
Wait
(),
0U
,
platform
::
errors
::
ExecutionTimeout
(
"internal error in RPCClient"
));
VLOG
(
7
)
<<
"after sync_send "
<<
ins
[
i
]
<<
"from "
<<
epmap
[
i
];
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录