Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
74259bac
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
74259bac
编写于
6月 24, 2022
作者:
R
ronnywang
提交者:
GitHub
6月 24, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix npu plugin hang in backward in eager mode (#43614)
上级
69e99cc7
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
55 addition
and
9 deletion
+55
-9
paddle/fluid/eager/grad_tensor_holder.cc
paddle/fluid/eager/grad_tensor_holder.cc
+17
-9
paddle/fluid/pybind/eager_method.cc
paddle/fluid/pybind/eager_method.cc
+26
-0
paddle/phi/api/include/tensor.h
paddle/phi/api/include/tensor.h
+8
-0
paddle/phi/api/lib/tensor.cc
paddle/phi/api/lib/tensor.cc
+4
-0
未找到文件。
paddle/fluid/eager/grad_tensor_holder.cc
浏览文件 @
74259bac
...
...
@@ -29,7 +29,9 @@ void GradTensorHolder::SetBufferSlotRankZeros(size_t slot_id, size_t rank) {
}
void
GradTensorHolder
::
CopyValueFromTensor
(
size_t
slot_id
,
size_t
rank
,
const
paddle
::
experimental
::
Tensor
&
t
,
size_t
slot_id
,
size_t
rank
,
const
paddle
::
experimental
::
Tensor
&
t
,
bool
fill_one
)
{
// TODO(jiabin): We need to deal with empty input_buffer with slot size not
// empty;
...
...
@@ -49,7 +51,9 @@ void GradTensorHolder::CopyValueFromTensor(
paddle
::
platform
::
errors
::
Fatal
(
"Invalid rank for GradTensorHolder::add() which exceeds size "
"of buffer slot %d, got slot size is: %d rank is: %d"
,
slot_id
,
buffer_
[
slot_id
].
size
(),
rank
));
slot_id
,
buffer_
[
slot_id
].
size
(),
rank
));
if
(
!
fill_one
)
{
paddle
::
experimental
::
Tensor
&
buffer_tensor
=
buffer_
[
slot_id
][
rank
];
if
((
!
buffer_tensor
.
defined
()
||
!
buffer_tensor
.
initialized
()))
{
...
...
@@ -83,7 +87,8 @@ void GradTensorHolder::CopyValueFromTensor(
}
}
void
GradTensorHolder
::
add
(
size_t
slot_id
,
size_t
rank
,
void
GradTensorHolder
::
add
(
size_t
slot_id
,
size_t
rank
,
const
paddle
::
experimental
::
Tensor
&
t
,
bool
create_graph
)
{
PADDLE_ENFORCE
(
slot_id
<
buffer_
.
size
(),
...
...
@@ -102,7 +107,9 @@ void GradTensorHolder::add(size_t slot_id, size_t rank,
paddle
::
platform
::
errors
::
Fatal
(
"Invalid rank for GradTensorHolder::add() which exceeds size "
"of buffer slot %d, got slot size is: %d rank is: %d"
,
slot_id
,
buffer_
[
slot_id
].
size
(),
rank
));
slot_id
,
buffer_
[
slot_id
].
size
(),
rank
));
paddle
::
experimental
::
Tensor
&
buffer_tensor
=
buffer_
[
slot_id
][
rank
];
// TODO(jiabin): Code bellow is ugly to divide which inner var we used,
...
...
@@ -115,7 +122,8 @@ void GradTensorHolder::add(size_t slot_id, size_t rank,
buffer_tensor
=
t
;
}
else
{
// Accumulation
PADDLE_ENFORCE_EQ
(
t
.
initialized
(),
true
,
PADDLE_ENFORCE_EQ
(
t
.
initialized
(),
true
,
paddle
::
platform
::
errors
::
Fatal
(
"We can only accumulate initialized tensor, but we "
"got tensor: %s is empty please check you network "
...
...
@@ -124,7 +132,7 @@ void GradTensorHolder::add(size_t slot_id, size_t rank,
if
(
t
.
is_dense_tensor
())
{
if
(
buffer_tensor
.
is_dense_tensor
())
{
if
(
create_graph
)
{
if
(
create_graph
||
t
.
is_custom_device
()
)
{
buffer_tensor
=
add_final_state_dygraph_function
(
t
,
buffer_tensor
);
}
else
{
paddle
::
imperative
::
TensorAdd
<
paddle
::
experimental
::
Tensor
>
(
...
...
@@ -136,8 +144,8 @@ void GradTensorHolder::add(size_t slot_id, size_t rank,
// add_dygraph_function once it's supported
paddle
::
experimental
::
Tensor
new_buffer
(
std
::
make_shared
<
phi
::
DenseTensor
>
(),
"tmp_accumulator"
);
paddle
::
imperative
::
SelectedRowsAddTensor
(
buffer_tensor
,
t
,
&
new_buffer
);
paddle
::
imperative
::
SelectedRowsAddTensor
(
buffer_tensor
,
t
,
&
new_buffer
);
buffer_tensor
.
set_impl
(
new_buffer
.
impl
());
}
}
else
if
(
t
.
is_sparse_coo_tensor
())
{
...
...
@@ -151,7 +159,7 @@ void GradTensorHolder::add(size_t slot_id, size_t rank,
paddle
::
experimental
::
Tensor
buffer_values
(
std
::
make_shared
<
phi
::
DenseTensor
>
(
buffer_sparse
->
non_zero_elements
()));
if
(
create_graph
)
{
if
(
create_graph
||
t
.
is_custom_device
()
)
{
buffer_values
=
add_final_state_dygraph_function
(
t_values
,
buffer_values
);
}
else
{
...
...
paddle/fluid/pybind/eager_method.cc
浏览文件 @
74259bac
...
...
@@ -292,6 +292,32 @@ static PyObject* tensor_method_numpy(TensorObject* self,
dense_tensor
->
numel
(),
kind
);
}
#endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE
}
else
if
(
self
->
tensor
.
is_custom_device
())
{
if
(
self
->
tensor
.
is_selected_rows
())
{
VLOG
(
6
)
<<
"Getting SelectedRows's numpy value"
;
auto
*
selected_rows
=
static_cast
<
phi
::
SelectedRows
*>
(
self
->
tensor
.
impl
().
get
());
auto
*
dense_tensor
=
static_cast
<
paddle
::
framework
::
LoDTensor
*>
(
selected_rows
->
mutable_value
());
phi
::
DeviceManager
::
GetDeviceWithPlace
(
self
->
tensor
.
place
())
->
MemoryCopyD2H
(
pybind11
::
detail
::
array_proxy
(
array
)
->
data
,
dense_tensor
->
data
(),
paddle
::
framework
::
DataTypeSize
(
dense_tensor
->
dtype
())
*
dense_tensor
->
numel
());
}
else
{
VLOG
(
6
)
<<
"Getting DenseTensor's numpy value"
;
auto
dense_tensor
=
std
::
dynamic_pointer_cast
<
phi
::
DenseTensor
>
(
self
->
tensor
.
impl
());
phi
::
DeviceManager
::
GetDeviceWithPlace
(
self
->
tensor
.
place
())
->
MemoryCopyD2H
(
pybind11
::
detail
::
array_proxy
(
array
)
->
data
,
dense_tensor
->
data
(),
paddle
::
framework
::
DataTypeSize
(
dense_tensor
->
dtype
())
*
dense_tensor
->
numel
());
}
#endif
}
else
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
...
...
paddle/phi/api/include/tensor.h
浏览文件 @
74259bac
...
...
@@ -286,6 +286,14 @@ class PADDLE_API Tensor final {
*/
bool
is_gpu_pinned
()
const
;
/**
* @brief Determine whether the tensor device is CustomDevice
*
* @return true
* @return false
*/
bool
is_custom_device
()
const
;
/* Part 4: Data Access methods */
/**
...
...
paddle/phi/api/lib/tensor.cc
浏览文件 @
74259bac
...
...
@@ -177,6 +177,10 @@ bool Tensor::is_gpu_pinned() const {
return
paddle
::
platform
::
is_cuda_pinned_place
(
place
());
}
bool
Tensor
::
is_custom_device
()
const
{
return
paddle
::
platform
::
is_custom_place
(
place
());
}
/* Part 4: Data Access methods */
template
<
typename
T
>
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录