Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
0d8ddf9f
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
0d8ddf9f
编写于
12月 07, 2022
作者:
Z
zhangyikun02
提交者:
GitHub
12月 07, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
modify d2d copy to xpu::copy in xpu kernel, test=kunlun (#48710)
上级
57ad9b46
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
91 addition
and
33 deletion
+91
-33
paddle/fluid/operators/reader/buffered_reader.cc
paddle/fluid/operators/reader/buffered_reader.cc
+6
-1
paddle/phi/backends/xpu/xpu_info.cc
paddle/phi/backends/xpu/xpu_info.cc
+6
-13
paddle/phi/kernels/reshape_grad_kernel.cc
paddle/phi/kernels/reshape_grad_kernel.cc
+22
-0
paddle/phi/kernels/reshape_kernel.cc
paddle/phi/kernels/reshape_kernel.cc
+30
-0
paddle/phi/kernels/xpu/gather_nd_kernel.cc
paddle/phi/kernels/xpu/gather_nd_kernel.cc
+5
-7
paddle/phi/kernels/xpu/generate_proposals_kernel.cc
paddle/phi/kernels/xpu/generate_proposals_kernel.cc
+10
-10
paddle/phi/kernels/xpu/scatter_kernel.cc
paddle/phi/kernels/xpu/scatter_kernel.cc
+4
-1
paddle/phi/kernels/xpu/tile_kernel.cc
paddle/phi/kernels/xpu/tile_kernel.cc
+5
-1
python/paddle/fluid/tests/unittests/xpu/get_test_cover_info.py
...n/paddle/fluid/tests/unittests/xpu/get_test_cover_info.py
+2
-0
python/paddle/fluid/tests/unittests/xpu/test_reshape2_op_xpu.py
.../paddle/fluid/tests/unittests/xpu/test_reshape2_op_xpu.py
+1
-0
未找到文件。
paddle/fluid/operators/reader/buffered_reader.cc
浏览文件 @
0d8ddf9f
...
@@ -417,8 +417,13 @@ void BufferedReader::ReadAsync(size_t i) {
...
@@ -417,8 +417,13 @@ void BufferedReader::ReadAsync(size_t i) {
// TODO(zhanghuan) for now hardware not support xpu_memcpy_async, maybe
// TODO(zhanghuan) for now hardware not support xpu_memcpy_async, maybe
// KL3
// KL3
if
((
platform
::
is_xpu_place
(
cpu_place
)))
{
if
((
platform
::
is_xpu_place
(
cpu_place
)))
{
memory
::
Copy
(
place_
,
xpu_ptr
,
cpu_place
,
cpu_ptr
,
size
);
platform
::
XPUStreamSync
(
stream_
.
get
());
platform
::
XPUStreamSync
(
stream_
.
get
());
char
*
tmp
=
new
char
[
size
];
PADDLE_ENFORCE_XPU_SUCCESS
(
xpu_memcpy
(
tmp
,
cpu_ptr
,
size
,
XPUMemcpyKind
::
XPU_DEVICE_TO_HOST
));
PADDLE_ENFORCE_XPU_SUCCESS
(
xpu_memcpy
(
xpu_ptr
,
tmp
,
size
,
XPUMemcpyKind
::
XPU_HOST_TO_DEVICE
));
delete
[]
tmp
;
}
else
{
}
else
{
memory
::
Copy
(
place_
,
xpu_ptr
,
cpu_place
,
cpu_ptr
,
size
);
memory
::
Copy
(
place_
,
xpu_ptr
,
cpu_place
,
cpu_ptr
,
size
);
}
}
...
...
paddle/phi/backends/xpu/xpu_info.cc
浏览文件 @
0d8ddf9f
...
@@ -169,19 +169,12 @@ void MemcpySyncD2D(void* dst,
...
@@ -169,19 +169,12 @@ void MemcpySyncD2D(void* dst,
const
phi
::
XPUContext
&
dev_ctx
)
{
const
phi
::
XPUContext
&
dev_ctx
)
{
int
dev_id
=
GetXPUCurrentDeviceId
();
int
dev_id
=
GetXPUCurrentDeviceId
();
if
(
dst_place
.
device
==
dev_id
&&
src_place
.
device
==
dev_id
)
{
if
(
dst_place
.
device
==
dev_id
&&
src_place
.
device
==
dev_id
)
{
dev_ctx
.
Wait
();
PADDLE_ENFORCE_XDNN_SUCCESS
(
char
*
tmp
=
new
char
[
count
];
baidu
::
xpu
::
api
::
copy
(
dev_ctx
.
x_context
(),
PADDLE_ENFORCE_XPU_SUCCESS
(
static_cast
<
const
int8_t
*>
(
src
),
xpu_memcpy
(
tmp
,
src
,
count
,
XPUMemcpyKind
::
XPU_DEVICE_TO_HOST
));
static_cast
<
int8_t
*>
(
dst
),
PADDLE_ENFORCE_XPU_SUCCESS
(
count
),
xpu_memcpy
(
dst
,
tmp
,
count
,
XPUMemcpyKind
::
XPU_HOST_TO_DEVICE
));
"copy "
);
delete
[]
tmp
;
// PADDLE_ENFORCE_XDNN_SUCCESS(
// baidu::xpu::api::copy(dev_ctx.x_context(),
// static_cast<const int8_t*>(src),
// static_cast<int8_t*>(dst),
// count),
// "copy ");
}
else
{
}
else
{
PADDLE_ENFORCE_XPU_SUCCESS
(
PADDLE_ENFORCE_XPU_SUCCESS
(
xpu_memcpy_peer
(
dst_place
.
device
,
dst
,
src_place
.
device
,
src
,
count
));
xpu_memcpy_peer
(
dst_place
.
device
,
dst
,
src_place
.
device
,
src
,
count
));
...
...
paddle/phi/kernels/reshape_grad_kernel.cc
浏览文件 @
0d8ddf9f
...
@@ -17,6 +17,9 @@
...
@@ -17,6 +17,9 @@
#include "paddle/phi/backends/all_context.h"
#include "paddle/phi/backends/all_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/core/tensor_utils.h"
#ifdef PADDLE_WITH_XPU
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#endif
namespace
phi
{
namespace
phi
{
...
@@ -29,6 +32,25 @@ void ReshapeGradKernel(const Context& dev_ctx,
...
@@ -29,6 +32,25 @@ void ReshapeGradKernel(const Context& dev_ctx,
x_grad
->
Resize
(
x_dims
);
x_grad
->
Resize
(
x_dims
);
}
}
#ifdef PADDLE_WITH_XPU
template
<
>
void
ReshapeGradKernel
<
phi
::
XPUContext
>
(
const
XPUContext
&
dev_ctx
,
const
DenseTensor
&
out_grad
,
DenseTensor
*
x_grad
)
{
auto
x_dims
=
x_grad
->
dims
();
dev_ctx
.
Alloc
(
x_grad
,
out_grad
.
dtype
());
auto
*
src_ptr
=
out_grad
.
data
();
auto
*
dst_ptr
=
x_grad
->
data
();
auto
size
=
out_grad
.
numel
()
*
paddle
::
experimental
::
SizeOf
(
out_grad
.
dtype
());
int
ret
=
xpu
::
copy
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
int8_t
*>
(
src_ptr
),
reinterpret_cast
<
int8_t
*>
(
dst_ptr
),
size
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
ret
,
"copy"
);
x_grad
->
Resize
(
x_dims
);
}
#endif
template
<
typename
Context
>
template
<
typename
Context
>
void
ReshapeDoubleGradKernel
(
const
Context
&
dev_ctx
,
void
ReshapeDoubleGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
out_grad
,
const
DenseTensor
&
out_grad
,
...
...
paddle/phi/kernels/reshape_kernel.cc
浏览文件 @
0d8ddf9f
...
@@ -19,6 +19,9 @@
...
@@ -19,6 +19,9 @@
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/infermeta/unary.h"
#include "paddle/phi/infermeta/unary.h"
#include "paddle/phi/kernels/funcs/common_shape.h"
#include "paddle/phi/kernels/funcs/common_shape.h"
#ifdef PADDLE_WITH_XPU
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#endif
namespace
phi
{
namespace
phi
{
...
@@ -42,6 +45,33 @@ void ReshapeKernel(const Context& dev_ctx,
...
@@ -42,6 +45,33 @@ void ReshapeKernel(const Context& dev_ctx,
out
->
ResetLoD
(
x
.
lod
());
out
->
ResetLoD
(
x
.
lod
());
}
}
#ifdef PADDLE_WITH_XPU
template
<
>
void
ReshapeKernel
<
phi
::
XPUContext
>
(
const
XPUContext
&
dev_ctx
,
const
DenseTensor
&
x
,
const
IntArray
&
shape
,
DenseTensor
*
out
)
{
MetaTensor
meta_out
(
out
);
InferMetaFromVecValue
(
x
,
shape
.
GetData
(),
&
meta_out
);
if
(
x
.
initialized
()
&&
x
.
Holder
()
==
out
->
Holder
())
{
dev_ctx
.
Alloc
(
out
,
x
.
dtype
());
return
;
}
dev_ctx
.
Alloc
(
out
,
x
.
dtype
());
auto
dims
=
out
->
dims
();
auto
*
src_ptr
=
x
.
data
();
auto
*
dst_ptr
=
out
->
data
();
auto
size
=
x
.
numel
()
*
paddle
::
experimental
::
SizeOf
(
x
.
dtype
());
int
ret
=
xpu
::
copy
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
int8_t
*>
(
src_ptr
),
reinterpret_cast
<
int8_t
*>
(
dst_ptr
),
size
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
ret
,
"copy"
);
out
->
Resize
(
dims
);
out
->
ResetLoD
(
x
.
lod
());
}
#endif
template
<
typename
Context
>
template
<
typename
Context
>
void
ReshapeWithXShape
(
const
Context
&
dev_ctx
,
void
ReshapeWithXShape
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
x
,
...
...
paddle/phi/kernels/xpu/gather_nd_kernel.cc
浏览文件 @
0d8ddf9f
...
@@ -30,7 +30,10 @@ void GatherNdKernel(const Context &ctx,
...
@@ -30,7 +30,10 @@ void GatherNdKernel(const Context &ctx,
if
(
x
.
numel
()
==
0
)
return
;
if
(
x
.
numel
()
==
0
)
return
;
if
(
index
.
numel
()
==
0
)
{
if
(
index
.
numel
()
==
0
)
{
phi
::
Copy
(
ctx
,
x
,
phi
::
XPUPlace
(),
true
,
out
);
out
->
Resize
(
x
.
dims
());
ctx
.
template
Alloc
<
T
>(
out
);
int
r
=
xpu
::
copy
(
ctx
.
x_context
(),
x
.
data
<
T
>
(),
out
->
data
<
T
>
(),
x
.
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"copy"
);
return
;
return
;
}
}
...
@@ -69,12 +72,7 @@ void GatherNdKernel(const Context &ctx,
...
@@ -69,12 +72,7 @@ void GatherNdKernel(const Context &ctx,
x_vec
,
x_vec
,
index_shape
);
index_shape
);
}
}
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_XDNN_SUCCESS
(
ret
,
"gather_nd"
);
ret
,
XPU_SUCCESS
,
phi
::
errors
::
External
(
"XPU gather_nd kernel return wrong value[%d %s]"
,
ret
,
XPUAPIErrorMsg
[
ret
]));
}
}
}
// namespace phi
}
// namespace phi
...
...
paddle/phi/kernels/xpu/generate_proposals_kernel.cc
浏览文件 @
0d8ddf9f
...
@@ -372,16 +372,16 @@ void GenerateProposalsKernel(const Context& dev_ctx,
...
@@ -372,16 +372,16 @@ void GenerateProposalsKernel(const Context& dev_ctx,
DenseTensor
&
proposals
=
tensor_pair
.
first
;
DenseTensor
&
proposals
=
tensor_pair
.
first
;
DenseTensor
&
nscores
=
tensor_pair
.
second
;
DenseTensor
&
nscores
=
tensor_pair
.
second
;
paddle
::
memory
::
Copy
(
place
,
r
=
xpu
::
copy
(
dev_ctx
.
x_context
()
,
rpn_rois
->
data
<
T
>
()
+
num_proposals
*
4
,
proposals
.
data
<
T
>
()
,
place
,
rpn_rois
->
data
<
T
>
()
+
num_proposals
*
4
,
proposals
.
data
<
T
>
(),
proposals
.
numel
());
sizeof
(
T
)
*
proposals
.
numel
()
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"copy"
);
paddle
::
memory
::
Copy
(
place
,
r
=
xpu
::
copy
(
dev_ctx
.
x_context
()
,
rpn_roi_probs
->
data
<
T
>
()
+
num_proposals
,
nscores
.
data
<
T
>
()
,
place
,
rpn_roi_probs
->
data
<
T
>
()
+
num_proposals
,
nscores
.
data
<
T
>
(),
nscores
.
numel
());
sizeof
(
T
)
*
scores
.
numel
()
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"copy"
);
if
(
dev_ctx
.
x_context
()
->
xpu_stream
)
{
if
(
dev_ctx
.
x_context
()
->
xpu_stream
)
{
dev_ctx
.
Wait
();
dev_ctx
.
Wait
();
...
...
paddle/phi/kernels/xpu/scatter_kernel.cc
浏览文件 @
0d8ddf9f
...
@@ -27,7 +27,10 @@ void ScatterKernel(const Context &ctx,
...
@@ -27,7 +27,10 @@ void ScatterKernel(const Context &ctx,
const
DenseTensor
&
updates
,
const
DenseTensor
&
updates
,
bool
overwrite
,
bool
overwrite
,
DenseTensor
*
out
)
{
DenseTensor
*
out
)
{
phi
::
Copy
(
ctx
,
x
,
ctx
.
GetPlace
(),
false
,
out
);
out
->
Resize
(
x
.
dims
());
ctx
.
template
Alloc
<
T
>(
out
);
int
ret
=
xpu
::
copy
(
ctx
.
x_context
(),
x
.
data
<
T
>
(),
out
->
data
<
T
>
(),
x
.
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
ret
,
"copy"
);
// Apply ScatterUpdate: Out[index] = Updates[:]
// Apply ScatterUpdate: Out[index] = Updates[:]
const
auto
&
index_type
=
index
.
dtype
();
const
auto
&
index_type
=
index
.
dtype
();
bool
index_type_match
=
bool
index_type_match
=
...
...
paddle/phi/kernels/xpu/tile_kernel.cc
浏览文件 @
0d8ddf9f
...
@@ -102,7 +102,11 @@ void TileKernel(const Context& dev_ctx,
...
@@ -102,7 +102,11 @@ void TileKernel(const Context& dev_ctx,
std
::
vector
<
int64_t
>
temp
(
repeat_times
.
size
(),
1
);
std
::
vector
<
int64_t
>
temp
(
repeat_times
.
size
(),
1
);
if
(
repeat_times
==
temp
)
{
if
(
repeat_times
==
temp
)
{
phi
::
Copy
(
dev_ctx
,
x
,
dev_ctx
.
GetPlace
(),
false
,
out
);
out
->
Resize
(
x
.
dims
());
dev_ctx
.
template
Alloc
<
T
>(
out
);
int
r
=
xpu
::
copy
(
dev_ctx
.
x_context
(),
x
.
data
<
T
>
(),
out
->
data
<
T
>
(),
x
.
numel
());
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"copy"
);
return
;
return
;
}
}
...
...
python/paddle/fluid/tests/unittests/xpu/get_test_cover_info.py
浏览文件 @
0d8ddf9f
...
@@ -94,6 +94,8 @@ xpu_test_op_type_white_list = [
...
@@ -94,6 +94,8 @@ xpu_test_op_type_white_list = [
"c_embedding_float32"
,
# unittests of collective ops do not using xpu testing framework
"c_embedding_float32"
,
# unittests of collective ops do not using xpu testing framework
"c_sync_comm_stream_float32"
,
"c_sync_comm_stream_float32"
,
"c_sync_calc_stream_float32"
,
"c_sync_calc_stream_float32"
,
"reshape2_bool"
,
"reshape2_grad_bool"
,
]
]
xpu_test_device_op_white_list
=
[]
xpu_test_device_op_white_list
=
[]
xpu_test_device_op_type_white_list
=
[]
xpu_test_device_op_type_white_list
=
[]
...
...
python/paddle/fluid/tests/unittests/xpu/test_reshape2_op_xpu.py
浏览文件 @
0d8ddf9f
...
@@ -41,6 +41,7 @@ class XPUTestReshapeOp(XPUOpTestWrapper):
...
@@ -41,6 +41,7 @@ class XPUTestReshapeOp(XPUOpTestWrapper):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
init_data
()
self
.
init_data
()
self
.
op_type
=
"reshape2"
self
.
op_type
=
"reshape2"
self
.
dtype
=
self
.
in_type
self
.
init_test_input
()
self
.
init_test_input
()
self
.
init_test_output
()
self
.
init_test_output
()
self
.
init_attrs
()
self
.
init_attrs
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录