Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
bdf5834e
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
bdf5834e
编写于
12月 13, 2021
作者:
T
taixiurong
提交者:
GitHub
12月 13, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update xpu_memcpy (#38049)
上级
9a4eec98
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
47 addition
and
42 deletion
+47
-42
paddle/fluid/framework/details/nan_inf_utils_detail.cc
paddle/fluid/framework/details/nan_inf_utils_detail.cc
+4
-2
paddle/fluid/memory/memcpy.cc
paddle/fluid/memory/memcpy.cc
+12
-2
paddle/fluid/operators/amp/check_finite_and_unscale_op_xpu.cc
...le/fluid/operators/amp/check_finite_and_unscale_op_xpu.cc
+4
-2
paddle/fluid/operators/amp/update_loss_scaling_op_xpu.cc
paddle/fluid/operators/amp/update_loss_scaling_op_xpu.cc
+14
-8
paddle/fluid/operators/masked_select_op_xpu.cc
paddle/fluid/operators/masked_select_op_xpu.cc
+3
-10
paddle/fluid/operators/range_op_xpu.cc
paddle/fluid/operators/range_op_xpu.cc
+4
-6
paddle/fluid/operators/where_index_op_xpu.cc
paddle/fluid/operators/where_index_op_xpu.cc
+3
-10
paddle/fluid/pybind/tensor_py.h
paddle/fluid/pybind/tensor_py.h
+3
-2
未找到文件。
paddle/fluid/framework/details/nan_inf_utils_detail.cc
浏览文件 @
bdf5834e
...
...
@@ -353,8 +353,10 @@ void CheckVarHasNanOrInf(const std::string& op_type,
}
float
*
cpu_data
=
new
float
[
tensor
->
numel
()];
xpu_memcpy
(
cpu_data
,
tensor
->
data
<
float
>
(),
tensor
->
numel
()
*
sizeof
(
float
),
XPU_DEVICE_TO_HOST
);
memory
::
Copy
(
platform
::
CPUPlace
(),
static_cast
<
void
*>
(
cpu_data
),
BOOST_GET_CONST
(
platform
::
XPUPlace
,
tensor
->
place
()),
static_cast
<
const
void
*>
(
tensor
->
data
<
float
>
()),
tensor
->
numel
()
*
sizeof
(
float
));
bool
flag
=
false
;
for
(
int
i
=
0
;
i
<
tensor
->
numel
();
i
++
)
{
if
(
isnan
(
cpu_data
[
i
])
||
isinf
(
cpu_data
[
i
]))
{
...
...
paddle/fluid/memory/memcpy.cc
浏览文件 @
bdf5834e
...
...
@@ -136,6 +136,11 @@ void Copy<platform::CPUPlace, platform::XPUPlace>(platform::CPUPlace dst_place,
"Baidu Kunlun Card is properly installed."
,
ret
));
}
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
pool
.
GetByPlace
(
src_place
);
dev_ctx
->
Wait
();
ret
=
xpu_memcpy
(
dst
,
src
,
num
,
XPUMemcpyKind
::
XPU_DEVICE_TO_HOST
);
PADDLE_ENFORCE_EQ
(
ret
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
...
...
@@ -182,6 +187,11 @@ void Copy<platform::XPUPlace, platform::XPUPlace>(platform::XPUPlace dst_place,
"Baidu Kunlun Card is properly installed."
,
ret
));
void
*
tmp
=
malloc
(
num
);
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
pool
.
GetByPlace
(
src_place
);
dev_ctx
->
Wait
();
ret
=
xpu_memcpy
(
tmp
,
src
,
num
,
XPUMemcpyKind
::
XPU_DEVICE_TO_HOST
);
PADDLE_ENFORCE_EQ
(
ret
,
XPU_SUCCESS
,
...
...
@@ -214,8 +224,8 @@ void Copy<platform::XPUPlace, platform::XPUPlace>(platform::XPUPlace dst_place,
}
else
{
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
pool
.
GetByPlace
(
src_place
);
dev_ctx
->
Wait
();
int
ret
=
xpu
::
memcpy_device
(
dev_ctx
->
x_context
(),
dst
,
src
,
num
);
int
ret
=
xpu
::
copy
(
dev_ctx
->
x_context
(),
static_cast
<
const
int8_t
*>
(
src
),
static_cast
<
int8_t
*>
(
dst
)
,
num
);
PADDLE_ENFORCE_EQ
(
ret
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU API return wrong value[%d %s]"
,
ret
,
XPUAPIErrorMsg
[
ret
]));
...
...
paddle/fluid/operators/amp/check_finite_and_unscale_op_xpu.cc
浏览文件 @
bdf5834e
...
...
@@ -40,8 +40,10 @@ class CheckFiniteAndUnscaleXPUKernel : public framework::OpKernel<T> {
MPDType
cpu_scale_data
;
if
(
platform
::
is_xpu_place
(
scale
->
place
()))
{
xpu_memcpy
(
&
cpu_scale_data
,
scale_data
,
sizeof
(
MPDType
),
XPUMemcpyKind
::
XPU_DEVICE_TO_HOST
);
memory
::
Copy
(
platform
::
CPUPlace
(),
static_cast
<
void
*>
(
&
cpu_scale_data
),
BOOST_GET_CONST
(
platform
::
XPUPlace
,
scale
->
place
()),
static_cast
<
const
void
*>
(
scale_data
),
sizeof
(
MPDType
));
}
else
{
cpu_scale_data
=
(
*
scale_data
);
}
...
...
paddle/fluid/operators/amp/update_loss_scaling_op_xpu.cc
浏览文件 @
bdf5834e
...
...
@@ -42,8 +42,10 @@ class UpdateLossScalingXPUKernel : public framework::OpKernel<T> {
const
bool
*
found_inf_data
=
found_inf
->
data
<
bool
>
();
bool
cpu_found_inf_data
=
false
;
if
(
platform
::
is_xpu_place
(
found_inf
->
place
()))
{
xpu_memcpy
(
&
cpu_found_inf_data
,
found_inf_data
,
sizeof
(
bool
),
XPUMemcpyKind
::
XPU_DEVICE_TO_HOST
);
memory
::
Copy
(
platform
::
CPUPlace
(),
static_cast
<
void
*>
(
&
cpu_found_inf_data
),
BOOST_GET_CONST
(
platform
::
XPUPlace
,
found_inf
->
place
()),
static_cast
<
const
void
*>
(
found_inf_data
),
sizeof
(
bool
));
}
else
{
cpu_found_inf_data
=
(
*
found_inf_data
);
}
...
...
@@ -94,22 +96,26 @@ class UpdateLossScalingXPUKernel : public framework::OpKernel<T> {
int
cpu_good_in_data
;
MPDType
cpu_pre_loss_scaling_data
;
if
(
platform
::
is_xpu_place
(
bad_in
->
place
()))
{
xpu_memcpy
(
&
cpu_bad_in_data
,
bad_in_data
,
sizeof
(
int
),
XPUMemcpyKind
::
XPU_DEVICE_TO_HOST
);
memory
::
Copy
(
platform
::
CPUPlace
(),
static_cast
<
void
*>
(
&
cpu_bad_in_data
),
BOOST_GET_CONST
(
platform
::
XPUPlace
,
bad_in
->
place
()),
static_cast
<
const
void
*>
(
bad_in_data
),
sizeof
(
int
));
}
else
{
cpu_bad_in_data
=
(
*
bad_in_data
);
}
if
(
platform
::
is_xpu_place
(
good_in
->
place
()))
{
xpu_memcpy
(
&
cpu_good_in_data
,
good_in_data
,
sizeof
(
int
),
XPUMemcpyKind
::
XPU_DEVICE_TO_HOST
);
memory
::
Copy
(
platform
::
CPUPlace
(),
static_cast
<
void
*>
(
&
cpu_good_in_data
),
BOOST_GET_CONST
(
platform
::
XPUPlace
,
good_in
->
place
()),
static_cast
<
const
void
*>
(
good_in_data
),
sizeof
(
int
));
}
else
{
cpu_good_in_data
=
(
*
good_in_data
);
}
if
(
platform
::
is_xpu_place
(
pre_loss_scaling
->
place
()))
{
xpu_memcpy
(
&
cpu_pre_loss_scaling_data
,
pre_loss_scaling_data
,
sizeof
(
MPDType
),
XPUMemcpyKind
::
XPU_DEVICE_TO_HOST
);
memory
::
Copy
(
platform
::
CPUPlace
(),
static_cast
<
void
*>
(
&
cpu_pre_loss_scaling_data
),
BOOST_GET_CONST
(
platform
::
XPUPlace
,
pre_loss_scaling
->
place
()),
static_cast
<
const
void
*>
(
pre_loss_scaling_data
),
sizeof
(
MPDType
));
}
else
{
cpu_pre_loss_scaling_data
=
(
*
pre_loss_scaling_data
);
}
...
...
paddle/fluid/operators/masked_select_op_xpu.cc
浏览文件 @
bdf5834e
...
...
@@ -48,16 +48,9 @@ class MaskedSelectXPUKernel : public framework::OpKernel<T> {
"XPU nonzero_count kernel return wrong value[%d %s]"
,
ret
,
XPUAPIErrorMsg
[
ret
]));
if
(
dev_ctx
.
x_context
()
->
xpu_stream
)
{
dev_ctx
.
Wait
();
}
ret
=
xpu_memcpy
(
static_cast
<
void
*>
(
&
out_size_cpu
),
static_cast
<
const
void
*>
(
out_size
),
sizeof
(
int32_t
),
XPU_DEVICE_TO_HOST
);
PADDLE_ENFORCE_EQ
(
ret
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU xpu_memcpy return wrong "
"value[%d %s]"
,
ret
,
XPUAPIErrorMsg
[
ret
]));
memory
::
Copy
(
platform
::
CPUPlace
(),
static_cast
<
void
*>
(
&
out_size_cpu
),
BOOST_GET_CONST
(
platform
::
XPUPlace
,
mask
->
place
()),
static_cast
<
void
*>
(
out_size
),
sizeof
(
int32_t
));
framework
::
DDim
out_dim
{
out_size_cpu
};
out
->
Resize
(
out_dim
);
...
...
paddle/fluid/operators/range_op_xpu.cc
浏览文件 @
bdf5834e
...
...
@@ -50,12 +50,10 @@ class XPURangeKernel : public framework::OpKernel<T> {
out_cpu_data_ptr
[
i
]
=
value
;
value
+=
step
;
}
int
ret
=
xpu_memcpy
(
out_data
,
out_cpu_data_ptr
,
out
->
numel
()
*
sizeof
(
T
),
XPUMemcpyKind
::
XPU_HOST_TO_DEVICE
);
PADDLE_ENFORCE_EQ
(
ret
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU xpu_memcpy return wrong "
"value[%d %s]"
,
ret
,
XPUAPIErrorMsg
[
ret
]));
memory
::
Copy
(
BOOST_GET_CONST
(
platform
::
XPUPlace
,
context
.
GetPlace
()),
static_cast
<
void
*>
(
out_data
),
platform
::
CPUPlace
(),
static_cast
<
void
*>
(
out_cpu_data_ptr
),
out
->
numel
()
*
sizeof
(
T
));
}
};
...
...
paddle/fluid/operators/where_index_op_xpu.cc
浏览文件 @
bdf5834e
...
...
@@ -43,16 +43,9 @@ class WhereIndexXPUKernel : public framework::OpKernel<T> {
"XPU nonzero_count kernel return wrong value[%d %s] in WhereIndex"
,
ret
,
XPUAPIErrorMsg
[
ret
]));
if
(
dev_ctx
.
x_context
()
->
xpu_stream
)
{
dev_ctx
.
Wait
();
}
ret
=
xpu_memcpy
(
static_cast
<
void
*>
(
&
true_num_cpu
),
static_cast
<
const
void
*>
(
true_num
),
sizeof
(
int32_t
),
XPU_DEVICE_TO_HOST
);
PADDLE_ENFORCE_EQ
(
ret
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU xpu_memcpy return wrong "
"value[%d %s]"
,
ret
,
XPUAPIErrorMsg
[
ret
]));
memory
::
Copy
(
platform
::
CPUPlace
(),
static_cast
<
void
*>
(
&
true_num_cpu
),
BOOST_GET_CONST
(
platform
::
XPUPlace
,
context
.
GetPlace
()),
static_cast
<
void
*>
(
true_num
),
sizeof
(
int32_t
));
out
->
Resize
(
framework
::
make_ddim
({
static_cast
<
int64_t
>
(
true_num_cpu
),
rank
}));
...
...
paddle/fluid/pybind/tensor_py.h
浏览文件 @
bdf5834e
...
...
@@ -307,8 +307,9 @@ void SetTensorFromPyArrayT(
platform
::
XPUDeviceGuard
guard
(
BOOST_GET_CONST
(
platform
::
XPUPlace
,
tmp_place
).
device
);
auto
dst
=
self
->
mutable_data
<
T
>
(
place
);
xpu_memcpy
(
dst
,
array
.
data
(),
array
.
nbytes
(),
XPUMemcpyKind
::
XPU_HOST_TO_DEVICE
);
memory
::
Copy
(
BOOST_GET_CONST
(
platform
::
XPUPlace
,
tmp_place
),
static_cast
<
void
*>
(
dst
),
platform
::
CPUPlace
(),
static_cast
<
const
void
*>
(
array
.
data
()),
array
.
nbytes
());
#else
PADDLE_THROW
(
platform
::
errors
::
PermissionDenied
(
"Cannot use XPUPlace in CPU/GPU version, "
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录