Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
52b05bac
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
52b05bac
编写于
3月 31, 2021
作者:
T
taixiurong
提交者:
GitHub
3月 31, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix some bug in transformer training in xpu (#31918)
上级
5394194e
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
354 addition
and
208 deletion
+354
-208
cmake/external/xpu.cmake
cmake/external/xpu.cmake
+1
-1
paddle/fluid/memory/memcpy.cc
paddle/fluid/memory/memcpy.cc
+3
-3
paddle/fluid/operators/cast_op_xpu.cc
paddle/fluid/operators/cast_op_xpu.cc
+34
-6
paddle/fluid/operators/matmul_op_xpu.cc
paddle/fluid/operators/matmul_op_xpu.cc
+53
-24
paddle/fluid/operators/matmul_v2_op_xpu.cc
paddle/fluid/operators/matmul_v2_op_xpu.cc
+43
-19
paddle/fluid/operators/optimizers/adam_op_xpu.cc
paddle/fluid/operators/optimizers/adam_op_xpu.cc
+14
-8
paddle/fluid/operators/reshape_op.cc
paddle/fluid/operators/reshape_op.cc
+3
-25
paddle/fluid/operators/softmax_with_cross_entropy_op_xpu.cc
paddle/fluid/operators/softmax_with_cross_entropy_op_xpu.cc
+16
-2
python/paddle/fluid/tests/unittests/test_matmul_op.py
python/paddle/fluid/tests/unittests/test_matmul_op.py
+36
-0
python/paddle/fluid/tests/unittests/xpu/test_cast_op_xpu.py
python/paddle/fluid/tests/unittests/xpu/test_cast_op_xpu.py
+4
-4
python/paddle/fluid/tests/unittests/xpu/test_matmul_op_xpu.py
...on/paddle/fluid/tests/unittests/xpu/test_matmul_op_xpu.py
+42
-16
python/paddle/fluid/tests/unittests/xpu/test_matmul_v2_op_xpu.py
...paddle/fluid/tests/unittests/xpu/test_matmul_v2_op_xpu.py
+105
-100
未找到文件。
cmake/external/xpu.cmake
浏览文件 @
52b05bac
...
...
@@ -13,7 +13,7 @@ if(NOT XPU_SDK_ROOT)
elseif
(
WITH_SUNWAY
)
SET
(
XPU_URL
"https://baidu-kunlun-public.su.bcebos.com/paddle_depence/sunway/xpu_2021_01_13.tar.gz"
CACHE STRING
""
FORCE
)
else
()
SET
(
XPU_URL
"https://baidu-kunlun-public.su.bcebos.com/paddle_depence/xpu_2021_0
2_27
.tar.gz"
CACHE STRING
""
FORCE
)
SET
(
XPU_URL
"https://baidu-kunlun-public.su.bcebos.com/paddle_depence/xpu_2021_0
3_30
.tar.gz"
CACHE STRING
""
FORCE
)
endif
()
SET
(
XPU_SOURCE_DIR
"
${
THIRD_PARTY_PATH
}
/xpu"
)
...
...
paddle/fluid/memory/memcpy.cc
浏览文件 @
52b05bac
...
...
@@ -40,7 +40,7 @@ void Copy<platform::XPUPlace, platform::CPUPlace>(platform::XPUPlace dst_place,
platform
::
CPUPlace
src_place
,
const
void
*
src
,
size_t
num
)
{
if
(
num
<=
0
)
{
VLOG
(
0
)
<<
"memcpy XPU_HOST_TO_DEVICE size <= 0 ("
<<
num
<<
")"
;
VLOG
(
1
)
<<
"memcpy XPU_HOST_TO_DEVICE size <= 0 ("
<<
num
<<
")"
;
return
;
}
int
dev_id
=
-
1
;
...
...
@@ -86,7 +86,7 @@ void Copy<platform::CPUPlace, platform::XPUPlace>(platform::CPUPlace dst_place,
platform
::
XPUPlace
src_place
,
const
void
*
src
,
size_t
num
)
{
if
(
num
<=
0
)
{
VLOG
(
0
)
<<
"memcpy XPU_DEVICE_TO_HOST size <= 0 ("
<<
num
<<
")"
;
VLOG
(
1
)
<<
"memcpy XPU_DEVICE_TO_HOST size <= 0 ("
<<
num
<<
")"
;
return
;
}
int
dev_id
=
-
1
;
...
...
@@ -132,7 +132,7 @@ void Copy<platform::XPUPlace, platform::XPUPlace>(platform::XPUPlace dst_place,
platform
::
XPUPlace
src_place
,
const
void
*
src
,
size_t
num
)
{
if
(
num
<=
0
)
{
VLOG
(
0
)
<<
"memcpy XPU_DEVICE_TO_DEVICE size <= 0 ("
<<
num
<<
")"
;
VLOG
(
1
)
<<
"memcpy XPU_DEVICE_TO_DEVICE size <= 0 ("
<<
num
<<
")"
;
return
;
}
int
dev_id
=
-
1
;
...
...
paddle/fluid/operators/cast_op_xpu.cc
浏览文件 @
52b05bac
...
...
@@ -23,8 +23,22 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
template
<
typename
T
>
class
XPUFPTypeTrait
{
public:
using
Type
=
T
;
};
template
<
>
class
XPUFPTypeTrait
<
platform
::
float16
>
{
public:
using
Type
=
float16
;
};
template
<
typename
DeviceContext
,
typename
InT
>
class
CastXPUKernel
:
public
framework
::
OpKernel
<
InT
>
{
using
XPUInTDType
=
typename
XPUFPTypeTrait
<
InT
>::
Type
;
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
in
=
context
.
Input
<
framework
::
Tensor
>
(
"X"
);
...
...
@@ -34,27 +48,39 @@ class CastXPUKernel : public framework::OpKernel<InT> {
auto
out_type
=
static_cast
<
framework
::
proto
::
VarType
::
Type
>
(
context
.
Attr
<
int
>
(
"out_dtype"
));
auto
*
in_data
=
in
->
data
<
InT
>
();
// using XPUOutTDType = typename XPUFPTypeTrait<InT>::Type;
auto
numel
=
in
->
numel
();
auto
&
dev_ctx
=
context
.
template
device_context
<
DeviceContext
>();
int
r
=
-
1
;
if
(
out_type
==
framework
::
proto
::
VarType
::
FP32
)
{
auto
*
out_data
=
out
->
mutable_data
<
float
>
(
context
.
GetPlace
());
r
=
xpu
::
cast_v2
<
InT
,
float
>
(
dev_ctx
.
x_context
(),
in_data
,
out_data
,
numel
);
r
=
xpu
::
cast_v2
<
XPUInTDType
,
float
>
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUInTDType
*>
(
in_data
),
out_data
,
numel
);
}
else
if
(
out_type
==
framework
::
proto
::
VarType
::
INT32
)
{
auto
*
out_data
=
out
->
mutable_data
<
int
>
(
context
.
GetPlace
());
r
=
xpu
::
cast_v2
<
InT
,
int32_t
>
(
dev_ctx
.
x_context
(),
in_data
,
out_data
,
numel
);
r
=
xpu
::
cast_v2
<
XPUInTDType
,
int32_t
>
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUInTDType
*>
(
in_data
),
out_data
,
numel
);
}
else
if
(
out_type
==
framework
::
proto
::
VarType
::
INT64
)
{
auto
*
out_data
=
out
->
mutable_data
<
int64_t
>
(
context
.
GetPlace
());
r
=
xpu
::
cast_v2
<
InT
,
int64_t
>
(
dev_ctx
.
x_context
(),
in_data
,
out_data
,
numel
);
r
=
xpu
::
cast_v2
<
XPUInTDType
,
int64_t
>
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUInTDType
*>
(
in_data
),
out_data
,
numel
);
}
else
if
((
out_type
==
framework
::
proto
::
VarType
::
BOOL
)
&&
(
in_type
==
framework
::
proto
::
VarType
::
FP32
))
{
auto
*
out_data
=
out
->
mutable_data
<
bool
>
(
context
.
GetPlace
());
r
=
xpu
::
cast_v2
<
float
,
int8_t
>
(
dev_ctx
.
x_context
(),
(
const
float
*
)
in_data
,
reinterpret_cast
<
int8_t
*>
(
out_data
),
numel
);
}
else
if
(
out_type
==
framework
::
proto
::
VarType
::
FP16
)
{
auto
*
out_data
=
out
->
mutable_data
<
paddle
::
platform
::
float16
>
(
context
.
GetPlace
());
r
=
xpu
::
cast_v2
<
XPUInTDType
,
float16
>
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUInTDType
*>
(
in_data
),
reinterpret_cast
<
float16
*>
(
out_data
),
numel
);
}
else
{
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"Not supported cast %d -> %d"
,
in_type
,
out_type
));
...
...
@@ -75,5 +101,7 @@ namespace ops = paddle::operators;
REGISTER_OP_XPU_KERNEL
(
cast
,
ops
::
CastXPUKernel
<
paddle
::
platform
::
XPUDeviceContext
,
int32_t
>
,
ops
::
CastXPUKernel
<
paddle
::
platform
::
XPUDeviceContext
,
float
>
,
ops
::
CastXPUKernel
<
paddle
::
platform
::
XPUDeviceContext
,
paddle
::
platform
::
float16
>
,
ops
::
CastXPUKernel
<
paddle
::
platform
::
XPUDeviceContext
,
int64_t
>
);
#endif
paddle/fluid/operators/matmul_op_xpu.cc
浏览文件 @
52b05bac
...
...
@@ -23,7 +23,6 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
using
framework
::
Tensor
;
static
framework
::
DDim
RowMatrixFromVector
(
const
framework
::
DDim
&
x_dim
)
{
...
...
@@ -123,34 +122,47 @@ static void MatMulXPUFunction(const Tensor *x, const Tensor *y, Tensor *out,
mat_dim_b
.
height_
=
mat_dim_b
.
height_
/
mat_dim_b
.
batch_size_
;
}
}
PADDLE_ENFORCE_EQ
(
mat_dim_a
.
width_
,
mat_dim_b
.
height_
,
platform
::
errors
::
InvalidArgument
(
"Shape mistake in matmul_op, the "
"first tensor width must be same as "
"second tensor height, but received "
"width:%d, height:%d"
,
mat_dim_a
.
width_
,
mat_dim_b
.
height_
));
if
(
mat_dim_a
.
width_
==
mat_dim_b
.
height_
)
{
if
(
mat_dim_a
.
batch_size_
==
0
&&
mat_dim_b
.
batch_size_
==
1
)
{
mat_dim_a
.
batch_size_
=
mat_dim_b
.
batch_size_
=
0
;
}
if
(
mat_dim_a
.
batch_size_
==
1
&&
mat_dim_b
.
batch_size_
==
0
)
{
mat_dim_a
.
batch_size_
=
mat_dim_b
.
batch_size_
=
0
;
}
}
PADDLE_ENFORCE_EQ
(
mat_dim_a
.
width_
,
mat_dim_b
.
height_
,
platform
::
errors
::
InvalidArgument
(
"Shape mistake in matmul_op, the "
"first tensor width must be same as "
"second tensor height, but received "
"width:%d, height:%d x_dims = %s , y_dims = %s"
,
mat_dim_a
.
width_
,
mat_dim_b
.
height_
,
x_dims
.
to_str
().
c_str
(),
y_dims
.
to_str
().
c_str
()));
PADDLE_ENFORCE_EQ
(
mat_dim_a
.
batch_size_
,
mat_dim_b
.
batch_size_
,
platform
::
errors
::
InvalidArgument
(
"Shape mistake in matmul_op, the two input"
"tensor batch_size must be same, but received first "
"tensor batch_size:%d, second "
"tensor batch_size:%d"
,
mat_dim_a
.
batch_size_
,
mat_dim_b
.
batch_size_
));
"tensor batch_size:%d, x_dims = %s , y_dims = %s"
,
mat_dim_a
.
batch_size_
,
mat_dim_b
.
batch_size_
,
x_dims
.
to_str
().
c_str
(),
y_dims
.
to_str
().
c_str
()));
T
alpha
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"alpha"
));
float
alpha
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"alpha"
));
float
*
data_c
=
out
->
data
<
T
>
();
T
*
data_c
=
out
->
data
<
T
>
();
int
m
=
mat_dim_a
.
height_
;
int
n
=
mat_dim_b
.
width_
;
int
k
=
mat_dim_a
.
width_
;
int
batch_size
=
mat_dim_a
.
batch_size_
;
int
ldx
=
mat_dim_a
.
trans_
?
m
:
k
;
int
ldy
=
mat_dim_b
.
trans_
?
k
:
n
;
int
ldout
=
n
;
int
batch_size
=
mat_dim_a
.
batch_size_
;
if
(
batch_size
==
0
)
{
int
r
=
xpu
::
fc_fusion
<
float
,
float
,
float
,
FCT
>
(
if
(
batch_size
<=
1
)
{
int
r
=
0
;
r
=
xpu
::
fc_fusion
<
T
,
T
,
T
,
FCT
>
(
dev_ctx
.
x_context
(),
x
->
data
<
T
>
(),
y
->
data
<
T
>
(),
data_c
,
m
,
n
,
k
,
mat_dim_a
.
trans_
,
mat_dim_b
.
trans_
,
nullptr
,
nullptr
,
nullptr
,
ldx
,
ldy
,
ldout
,
alpha
,
0
,
nullptr
,
xpu
::
Activation_t
::
LINEAR
);
...
...
@@ -159,14 +171,32 @@ static void MatMulXPUFunction(const Tensor *x, const Tensor *y, Tensor *out,
"XPU fc_fusion kernel return wrong value[%d %s]"
,
r
,
XPUAPIErrorMsg
[
r
]));
}
else
{
int
r
=
xpu
::
fc_batched
<
float
,
float
,
float
,
FCT
>
(
dev_ctx
.
x_context
(),
batch_size
,
mat_dim_a
.
trans_
,
mat_dim_b
.
trans_
,
m
,
n
,
k
,
alpha
,
x
->
data
<
T
>
(),
mat_dim_a
.
stride_
,
y
->
data
<
T
>
(),
mat_dim_b
.
stride_
,
0.0
,
data_c
,
m
*
n
,
nullptr
,
nullptr
);
// batch matmul
int
r
=
xpu
::
fc_batched
<
T
,
T
,
T
,
FCT
>
(
dev_ctx
.
x_context
(),
// Context* ctx,
batch_size
,
// int batch_size,
mat_dim_a
.
trans_
,
// bool x_trans,
mat_dim_b
.
trans_
,
// bool w_trans,
m
,
// int m,
n
,
// int n,
k
,
// int k,
alpha
,
// float alpha,
reinterpret_cast
<
const
T
*>
(
x
->
data
<
T
>
()),
// const TX* x,
mat_dim_a
.
stride_
,
// int stride_a,
reinterpret_cast
<
const
T
*>
(
y
->
data
<
T
>
()),
// const TW* w,
mat_dim_b
.
stride_
,
// int stride_b,
0.0
,
// float beta,
reinterpret_cast
<
T
*>
(
data_c
),
// TY* y,
m
*
n
,
// int stride_c,
nullptr
,
// const float* x_maxptr,
nullptr
);
// const float* w_maxptr
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU fc_batched kernel return wrong value[%d %s]"
,
r
,
XPUAPIErrorMsg
[
r
]));
"XPU fc_batched kernel return wrong value[%d %s] "
"x_dims = %s , y_dims = %s"
,
r
,
XPUAPIErrorMsg
[
r
],
x_dims
.
to_str
().
c_str
(),
y_dims
.
to_str
().
c_str
()));
}
}
...
...
@@ -206,9 +236,8 @@ static framework::Tensor XPUFoldHeadAndLastDims(
static_cast
<
int
>
(
in_dims
[
1
]),
static_cast
<
int
>
(
in_dims
[
2
])};
std
::
vector
<
int
>
axis_host
=
{
1
,
0
,
2
};
int
r
=
xpu
::
transpose
(
context
.
x_context
(),
input
.
data
<
T
>
(),
output
.
data
<
T
>
(),
in_shape_host
.
data
(),
axis_host
.
data
(),
/*ndims=*/
3
);
in_shape_host
,
axis_host
);
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU transpose kernel return wrong value[%d %s]"
,
r
,
...
...
paddle/fluid/operators/matmul_v2_op_xpu.cc
浏览文件 @
52b05bac
...
...
@@ -57,32 +57,55 @@ static void MatMulXPUFunction(const Tensor* x, const Tensor* y, Tensor* out,
PADDLE_ENFORCE_EQ
(
mat_dim_a
.
width_
,
mat_dim_b
.
height_
,
platform
::
errors
::
InvalidArgument
(
"Shape mistake in matmul_v2_op xdims = %s ydims = %s"
,
x_dims
.
to_str
(),
y_dims
.
to_str
()));
"Shape mistake in matmul_v2_op xdims = %s ydims = %s "
"x_trans = %d y_trans = %d"
,
x_dims
.
to_str
(),
y_dims
.
to_str
(),
mat_dim_a
.
trans_
,
mat_dim_b
.
trans_
));
PADDLE_ENFORCE_EQ
(
mat_dim_a
.
batch_size_
,
mat_dim_b
.
batch_size_
,
platform
::
errors
::
InvalidArgument
(
"Shape mistake in matmul_v2_op xdims = %s ydims = %s"
,
x_dims
.
to_str
(),
y_dims
.
to_str
()));
"Shape mistake in matmul_v2_op xdims = %s ydims = %s "
"x_trans = %d y_trans = %d"
,
x_dims
.
to_str
(),
y_dims
.
to_str
(),
mat_dim_a
.
trans_
,
mat_dim_b
.
trans_
));
float
*
data_c
=
out
->
data
<
T
>
();
T
*
data_c
=
out
->
data
<
T
>
();
int
m
=
mat_dim_a
.
height_
;
int
n
=
mat_dim_b
.
width_
;
int
k
=
mat_dim_a
.
width_
;
int
batch_size
=
mat_dim_a
.
batch_size_
;
if
(
batch_size
==
0
)
{
int
r
=
xpu
::
fc
<
float
,
float
,
float
,
FCT
>
(
dev_ctx
.
x_context
(),
x
->
data
<
T
>
(),
y
->
data
<
T
>
(),
data_c
,
m
,
n
,
k
,
mat_dim_a
.
trans_
,
mat_dim_b
.
trans_
,
nullptr
,
nullptr
,
nullptr
);
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU fc_fusion kernel return wrong value[%d %s]"
,
r
,
XPUAPIErrorMsg
[
r
]));
if
(
batch_size
<=
1
)
{
int
r
=
0
;
r
=
xpu
::
fc
<
T
,
T
,
T
,
FCT
>
(
dev_ctx
.
x_context
(),
x
->
data
<
T
>
(),
y
->
data
<
T
>
(),
data_c
,
m
,
n
,
k
,
mat_dim_a
.
trans_
,
mat_dim_b
.
trans_
,
nullptr
,
nullptr
,
nullptr
);
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU fc_fusion kernel return wrong value[%d %s] , m = %d, n = "
"%d, "
"k = %d, a_tr = %d, b_tr = %d"
,
r
,
XPUAPIErrorMsg
[
r
],
m
,
n
,
k
,
mat_dim_a
.
trans_
,
mat_dim_b
.
trans_
));
}
else
{
int
r
=
xpu
::
fc_batched
<
float
,
float
,
float
,
FCT
>
(
dev_ctx
.
x_context
(),
batch_size
,
mat_dim_a
.
trans_
,
mat_dim_b
.
trans_
,
m
,
n
,
k
,
1.0
,
x
->
data
<
T
>
(),
mat_dim_a
.
stride_
,
y
->
data
<
T
>
(),
mat_dim_b
.
stride_
,
0.0
,
data_c
,
m
*
n
,
nullptr
,
nullptr
);
// batch matmul
int
r
=
xpu
::
fc_batched
<
T
,
T
,
T
,
FCT
>
(
dev_ctx
.
x_context
(),
// Context* ctx,
batch_size
,
// int batch_size,
mat_dim_a
.
trans_
,
// bool x_trans,
mat_dim_b
.
trans_
,
// bool w_trans,
m
,
// int m,
n
,
// int n,
k
,
// int k,
1.0
,
// float alpha,
reinterpret_cast
<
const
T
*>
(
x
->
data
<
T
>
()),
// const TX* x,
mat_dim_a
.
stride_
,
// int stride_a,
reinterpret_cast
<
const
T
*>
(
y
->
data
<
T
>
()),
// const TW* w,
mat_dim_b
.
stride_
,
// int stride_b,
0.0
,
// float beta,
reinterpret_cast
<
T
*>
(
data_c
),
// TY* y,
m
*
n
,
// int stride_c,
nullptr
,
// const float* x_maxptr,
nullptr
);
// const float* w_maxptr
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU fc_batched kernel return wrong value[%d %s]"
,
r
,
...
...
@@ -125,7 +148,7 @@ static framework::Tensor XPUFoldHeadAndLastDims(
std
::
vector
<
int
>
axis_host
=
{
1
,
0
,
2
};
int
r
=
xpu
::
transpose
(
context
.
x_context
(),
input
.
data
<
T
>
(),
output
.
data
<
T
>
(),
in_shape_host
.
data
(),
axis_host
.
data
(),
/*ndims=*/
3
);
in_shape_host
,
axis_host
);
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU transpose kernel return wrong value[%d %s]"
,
r
,
...
...
@@ -189,6 +212,7 @@ class MatMulV2XPUGradKernel : public framework::OpKernel<T> {
auto
*
dx
=
context
.
Output
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
*
dy
=
context
.
Output
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
ReshapeXYOutIntoMatrixSequence
(
&
x
,
&
y
,
&
dout
,
transpose_x
,
transpose_y
);
framework
::
DDim
dx_dims
;
if
(
dx
)
{
dx_dims
=
dx
->
dims
();
...
...
paddle/fluid/operators/optimizers/adam_op_xpu.cc
浏览文件 @
52b05bac
...
...
@@ -121,19 +121,25 @@ class AdamOpXPUKernel : public framework::OpKernel<T> {
}
else
{
T
cpu_beta1_pow_out_data
;
T
cpu_beta2_pow_out_data
;
xpu_memcpy
(
&
cpu_beta1_pow_out_data
,
beta1_pow_ptr
,
sizeof
(
T
),
XPU_DEVICE_TO_HOST
);
memory
::
Copy
(
platform
::
CPUPlace
(),
&
cpu_beta1_pow_out_data
,
BOOST_GET_CONST
(
platform
::
XPUPlace
,
beta1_pow
.
place
()),
beta1_pow_ptr
,
sizeof
(
T
));
cpu_beta1_pow_out_data
=
cpu_beta1_pow_out_data
*
beta1
;
xpu_memcpy
(
&
cpu_beta2_pow_out_data
,
beta2_pow_ptr
,
sizeof
(
T
),
XPU_DEVICE_TO_HOST
);
memory
::
Copy
(
platform
::
CPUPlace
(),
&
cpu_beta2_pow_out_data
,
BOOST_GET_CONST
(
platform
::
XPUPlace
,
beta2_pow
.
place
()),
beta2_pow_ptr
,
sizeof
(
T
));
cpu_beta2_pow_out_data
=
cpu_beta2_pow_out_data
*
beta2
;
T
*
beta1_pow_out_p
=
beta1_pow_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
beta2_pow_out_p
=
beta2_pow_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
xpu_memcpy
(
beta1_pow_out_p
,
&
cpu_beta1_pow_out_data
,
sizeof
(
T
),
XPU_HOST_TO_DEVICE
);
xpu_memcpy
(
beta2_pow_out_p
,
&
cpu_beta2_pow_out_data
,
sizeof
(
T
),
XPU_HOST_TO_DEVICE
);
memory
::
Copy
(
BOOST_GET_CONST
(
platform
::
XPUPlace
,
ctx
.
GetPlace
()),
beta1_pow_out_p
,
platform
::
CPUPlace
(),
&
cpu_beta1_pow_out_data
,
sizeof
(
T
));
memory
::
Copy
(
BOOST_GET_CONST
(
platform
::
XPUPlace
,
ctx
.
GetPlace
()),
beta2_pow_out_p
,
platform
::
CPUPlace
(),
&
cpu_beta2_pow_out_data
,
sizeof
(
T
));
}
PADDLE_ENFORCE_EQ
(
r
==
xpu
::
Error_t
::
SUCCESS
,
true
,
...
...
paddle/fluid/operators/reshape_op.cc
浏览文件 @
52b05bac
...
...
@@ -377,31 +377,9 @@ class ReshapeKernel {
out
->
Resize
(
out_dims
);
out
->
mutable_data
(
ctx
.
GetPlace
(),
in
->
type
());
#ifdef PADDLE_WITH_XPU
if
(
platform
::
is_xpu_place
(
ctx
.
GetPlace
()))
{
void
*
out_ptr
=
out
->
data
<
void
>
();
const
void
*
in_ptr
=
in
->
data
<
void
>
();
if
((
out_ptr
!=
nullptr
)
&&
(
in_ptr
!=
nullptr
)
&&
(
paddle
::
framework
::
SizeOfType
(
in
->
type
())
>
0
))
{
auto
&
dev_ctx
=
ctx
.
template
device_context
<
paddle
::
platform
::
XPUDeviceContext
>();
int
r
=
xpu
::
memcpy_device
(
dev_ctx
.
x_context
(),
out_ptr
,
in_ptr
,
in
->
numel
()
*
paddle
::
framework
::
SizeOfType
(
in
->
type
()));
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU memcpy_device return wrong value[%d %s]"
,
r
,
XPUAPIErrorMsg
[
r
]));
}
}
else
{
#endif
framework
::
TensorCopy
(
*
in
,
ctx
.
GetPlace
(),
ctx
.
template
device_context
<
platform
::
DeviceContext
>(),
out
);
#ifdef PADDLE_WITH_XPU
}
#endif
framework
::
TensorCopy
(
*
in
,
ctx
.
GetPlace
(),
ctx
.
template
device_context
<
platform
::
DeviceContext
>(),
out
);
out
->
Resize
(
out_dims
);
}
};
...
...
paddle/fluid/operators/softmax_with_cross_entropy_op_xpu.cc
浏览文件 @
52b05bac
...
...
@@ -45,11 +45,25 @@ class SoftmaxWithCrossEntropyXPUKernel : public framework::OpKernel<T> {
const
int
n
=
SizeToAxis
(
axis
,
logits
->
dims
());
const
int
d
=
SizeFromAxis
(
axis
,
logits
->
dims
());
std
::
vector
<
int
>
logits_dims
=
framework
::
vectorize
<
int
>
(
logits
->
dims
());
// softmax
auto
&
dev_ctx
=
context
.
template
device_context
<
platform
::
XPUDeviceContext
>();
int
r
=
xpu
::
softmax
(
dev_ctx
.
x_context
(),
logits
->
data
<
float
>
(),
softmax
->
data
<
float
>
(),
logits_dims
,
axis
);
int
r
=
XPU_SUCCESS
;
Tensor
clip_logits
;
int
len
=
logits
->
numel
();
T
*
clip_logits_data
=
clip_logits
.
mutable_data
<
T
>
(
context
.
GetPlace
(),
len
*
sizeof
(
T
));
r
=
xpu
::
clip
(
dev_ctx
.
x_context
(),
logits
->
data
<
float
>
(),
clip_logits_data
,
len
,
-
1e30
,
1e30
);
PADDLE_ENFORCE_EQ
(
r
,
xpu
::
Error_t
::
SUCCESS
,
platform
::
errors
::
External
(
"XPU kernel error. clip "
"execution not succeed, error code=%d"
,
r
));
r
=
xpu
::
softmax
(
dev_ctx
.
x_context
(),
clip_logits_data
,
softmax
->
data
<
float
>
(),
logits_dims
,
axis
);
PADDLE_ENFORCE_EQ
(
r
,
xpu
::
Error_t
::
SUCCESS
,
...
...
python/paddle/fluid/tests/unittests/test_matmul_op.py
浏览文件 @
52b05bac
...
...
@@ -206,6 +206,42 @@ for dim_X in (1, 2, 3):
api_test
(
dim_X
,
dim_Y
,
transose_x
,
transose_y
)
# Test case more batch_size and N, M, K
def
generate_compatible_shapes
(
dim_X
,
dim_Y
,
transpose_X
,
transpose_Y
,
batch_size
):
BATCH_SIZE
=
2
M
=
3
N
=
4
K
=
5
if
(
dim_X
==
1
and
transpose_X
)
or
(
dim_Y
==
1
and
transpose_Y
):
K
=
1
if
dim_X
==
1
:
if
transpose_X
:
shape_X
=
[
M
]
else
:
shape_X
=
[
K
]
if
dim_Y
==
1
:
if
transpose_Y
:
shape_Y
=
[
N
]
else
:
shape_Y
=
[
K
]
if
dim_X
>=
2
:
if
transpose_X
:
shape_X
=
[
K
,
M
]
else
:
shape_X
=
[
M
,
K
]
if
dim_X
==
3
:
shape_X
=
[
BATCH_SIZE
]
+
shape_X
if
dim_Y
>=
2
:
if
transpose_Y
:
shape_Y
=
[
N
,
K
]
else
:
shape_Y
=
[
K
,
N
]
if
dim_Y
==
3
:
shape_Y
=
[
BATCH_SIZE
]
+
shape_Y
return
shape_X
,
shape_Y
# Test case n-dim
def
generate_compatible_shapes
(
dim
,
transpose_X
,
transpose_Y
):
M
=
2
...
...
python/paddle/fluid/tests/unittests/xpu/test_cast_op_xpu.py
浏览文件 @
52b05bac
...
...
@@ -51,10 +51,10 @@ class TestCastOp2(op_test.OpTest):
def
setUp
(
self
):
ipt
=
np
.
random
.
random
(
size
=
[
10
,
10
])
self
.
inputs
=
{
'X'
:
ipt
.
astype
(
'float32'
)}
self
.
outputs
=
{
'Out'
:
ipt
.
astype
(
'float
32
'
)}
self
.
outputs
=
{
'Out'
:
ipt
.
astype
(
'float
16
'
)}
self
.
attrs
=
{
'in_dtype'
:
int
(
core
.
VarDesc
.
VarType
.
FP32
),
'out_dtype'
:
int
(
core
.
VarDesc
.
VarType
.
FP
32
)
'out_dtype'
:
int
(
core
.
VarDesc
.
VarType
.
FP
16
)
}
self
.
op_type
=
'cast'
...
...
@@ -68,10 +68,10 @@ class TestCastOp2(op_test.OpTest):
class
TestCastOp3
(
op_test
.
OpTest
):
def
setUp
(
self
):
ipt
=
np
.
random
.
random
(
size
=
[
10
,
10
])
self
.
inputs
=
{
'X'
:
ipt
.
astype
(
'float
32
'
)}
self
.
inputs
=
{
'X'
:
ipt
.
astype
(
'float
16
'
)}
self
.
outputs
=
{
'Out'
:
ipt
.
astype
(
'float32'
)}
self
.
attrs
=
{
'in_dtype'
:
int
(
core
.
VarDesc
.
VarType
.
FP
32
),
'in_dtype'
:
int
(
core
.
VarDesc
.
VarType
.
FP
16
),
'out_dtype'
:
int
(
core
.
VarDesc
.
VarType
.
FP32
)
}
self
.
op_type
=
'cast'
...
...
python/paddle/fluid/tests/unittests/xpu/test_matmul_op_xpu.py
浏览文件 @
52b05bac
...
...
@@ -27,8 +27,12 @@ from paddle.fluid import Program, program_guard
paddle
.
enable_static
()
def
generate_compatible_shapes
(
dim_X
,
dim_Y
,
transpose_X
,
transpose_Y
):
def
generate_compatible_shapes
(
dim_X
,
dim_Y
,
transpose_X
,
transpose_Y
,
batch_size
):
BATCH_SIZE
=
2
if
batch_size
!=
None
:
BATCH_SIZE
=
batch_size
M
=
3
N
=
4
K
=
5
...
...
@@ -58,6 +62,13 @@ def generate_compatible_shapes(dim_X, dim_Y, transpose_X, transpose_Y):
shape_Y
=
[
K
,
N
]
if
dim_Y
==
3
:
shape_Y
=
[
BATCH_SIZE
]
+
shape_Y
if
dim_Y
==
3
and
dim_X
==
2
:
if
transpose_X
==
False
:
shape_X
[
1
]
=
shape_X
[
1
]
*
BATCH_SIZE
else
:
shape_X
[
0
]
=
shape_X
[
0
]
*
BATCH_SIZE
return
shape_X
,
shape_Y
...
...
@@ -77,11 +88,19 @@ def reference_matmul(X, Y, transpose_X=False, transpose_Y=False):
if
transpose_Y
:
if
Y
.
ndim
==
1
:
Y
=
Y
.
reshape
((
1
,
Y
.
size
))
elif
Y
.
ndim
==
2
:
Y
=
Y
.
T
else
:
dim
=
[
i
for
i
in
range
(
len
(
Y
.
shape
))]
dim
[
-
1
],
dim
[
len
(
Y
.
shape
)
-
2
]
=
dim
[
len
(
Y
.
shape
)
-
2
],
dim
[
-
1
]
Y
=
np
.
transpose
(
Y
,
tuple
(
dim
))
if
X
.
ndim
==
3
and
Y
.
ndim
==
2
:
x_dims
=
X
.
shape
X
=
X
.
reshape
((
x_dims
[
0
]
*
x_dims
[
1
],
x_dims
[
2
]))
if
Y
.
ndim
==
3
and
X
.
ndim
==
2
:
y_dims
=
Y
.
shape
Y
=
Y
.
reshape
((
y_dims
[
0
]
*
y_dims
[
1
],
y_dims
[
2
]))
Out
=
np
.
matmul
(
X
,
Y
)
if
not
Out
.
shape
:
# We do not support 0-dimensional Tensors (scalars). So where
...
...
@@ -203,11 +222,11 @@ def test_negative_dims_program(obj):
# Generate program api cases for all negative possibilities
def
api_test
(
dim_x
,
dim_y
,
trans_x
,
trans_y
):
def
api_test
(
dim_x
,
dim_y
,
trans_x
,
trans_y
,
batch_size
):
test_name
=
(
'TestMatMulAPI_dimX_{}_dim_Y_{}_transX_{}_transY_{}'
.
format
(
dim_x
,
dim_y
,
trans_x
,
trans_y
))
shape_x
,
shape_y
=
generate_compatible_shapes
(
dim_x
,
dim_y
,
trans_x
,
trans_y
)
trans_y
,
batch_size
)
globals
()[
test_name
]
=
type
(
test_name
,
(
unittest
.
TestCase
,
),
{
'shape_X'
:
shape_x
,
'shape_Y'
:
shape_y
,
...
...
@@ -218,29 +237,35 @@ def api_test(dim_x, dim_y, trans_x, trans_y):
# Generate operators cases for all possibilities
def
inject_test
(
dim_x
,
dim_y
,
trans_x
,
trans_y
):
test_name
=
(
'TestMatMulOp_dimX_{}_dim_Y_{}_transX_{}_transY_{}'
.
format
(
dim_x
,
dim_y
,
trans_x
,
trans_y
))
def
inject_test
(
dim_x
,
dim_y
,
trans_x
,
trans_y
,
batch_size
):
test_name
=
(
'TestMatMulOp_dimX_{}_dim_Y_{}_transX_{}_transY_{}_batch_{}'
.
format
(
dim_x
,
dim_y
,
trans_x
,
trans_y
,
batch
))
shape_x
,
shape_y
=
generate_compatible_shapes
(
dim_x
,
dim_y
,
trans_x
,
trans_y
)
trans_y
,
batch_size
)
globals
()[
test_name
]
=
type
(
test_name
,
(
Generator
,
XPUOpTest
),
{
'shape_X'
:
shape_x
,
'shape_Y'
:
shape_y
,
'transpose_X'
:
trans_x
,
'transpose_Y'
:
trans_y
,
'op_type'
:
"matmul"
})
for
dim_X
in
(
1
,
2
,
3
):
for
dim_Y
in
(
1
,
2
,
3
):
transose_x
=
False
transose_y
=
False
if
dim_X
==
3
and
dim_Y
==
3
:
inject_test
(
dim_X
,
dim_Y
,
transose_x
,
transose_y
)
api_test
(
dim_X
,
dim_Y
,
transose_x
,
transose_y
)
xpu_support_dims_list
=
[[
1
,
1
],
[
2
,
2
],
[
3
,
3
]]
batch_size
=
[
2
,
4
,
5
,
10
,
50
,
100
,
300
]
for
dims
in
xpu_support_dims_list
:
dim_X
=
dims
[
0
]
dim_Y
=
dims
[
1
]
for
transose_x
in
(
False
,
True
):
for
transose_y
in
(
False
,
True
):
for
batch
in
batch_size
:
inject_test
(
dim_X
,
dim_Y
,
transose_x
,
transose_y
,
batch
)
# xpu not support all negative possibilities
# api_test(dim_X, dim_Y, False, False, 10)
# Test case n-dim
# Test case n-dim
def
generate_compatible_shapes
(
dim
,
transpose_X
,
transpose_Y
):
M
=
2
N
=
4
...
...
@@ -261,7 +286,7 @@ def generate_compatible_shapes(dim, transpose_X, transpose_Y):
return
shape_X
,
shape_Y
#
#
Test case n-dim
# Test case n-dim
for
dim
in
[
4
]:
for
transpose_X
in
[
False
,
True
]:
for
transpose_Y
in
[
False
,
True
]:
...
...
@@ -275,6 +300,7 @@ for dim in [4]:
'shape_Y'
:
shape_Y
,
'transpose_X'
:
transpose_X
,
'transpose_Y'
:
transpose_Y
,
'op_type'
:
"matmul"
})
...
...
python/paddle/fluid/tests/unittests/xpu/test_matmul_v2_op_xpu.py
浏览文件 @
52b05bac
...
...
@@ -45,7 +45,6 @@ def reference_matmul(X, Y, transpose_X=False, transpose_Y=False):
dim
=
[
i
for
i
in
range
(
len
(
Y
.
shape
))]
dim
[
-
1
],
dim
[
len
(
Y
.
shape
)
-
2
]
=
dim
[
len
(
Y
.
shape
)
-
2
],
dim
[
-
1
]
Y
=
np
.
transpose
(
Y
,
tuple
(
dim
))
Out
=
np
.
matmul
(
X
,
Y
)
if
not
Out
.
shape
:
# We do not support 0-dimensional Tensors (scalars). So where
...
...
@@ -98,16 +97,16 @@ class TestMatMulV2Op(XPUOpTest):
self
.
check_grad_with_place
(
place
,
[
'X'
,
'Y'
],
'Out'
)
#
class TestMatMuklOp2(TestMatMulV2Op):
#
"""
#
case 2
#
"""
class
TestMatMuklOp2
(
TestMatMulV2Op
):
"""
case 2
"""
#
def config(self):
# self.x_shape = (100,
)
# self.y_shape = (1, 3, 2, 100
)
#
self.trans_x = False
# self.trans_y = Tru
e
def
config
(
self
):
self
.
x_shape
=
(
100
)
self
.
y_shape
=
(
100
,
3
)
self
.
trans_x
=
False
self
.
trans_y
=
Fals
e
class
TestMatMuklOp3
(
TestMatMulV2Op
):
...
...
@@ -122,16 +121,16 @@ class TestMatMuklOp3(TestMatMulV2Op):
self
.
trans_y
=
False
#
class TestMatMuklOp4(TestMatMulV2Op):
#
"""
#
case 4
#
"""
class
TestMatMuklOp4
(
TestMatMulV2Op
):
"""
case 4
"""
#
def config(self):
# self.x_shape = (100,
)
# self.y_shape = (1, 2, 100, 2
)
#
self.trans_x = False
#
self.trans_y = False
def
config
(
self
):
self
.
x_shape
=
(
1
,
1
,
100
,
1
)
self
.
y_shape
=
(
1
,
100
)
self
.
trans_x
=
False
self
.
trans_y
=
False
class
TestMatMuklOp5
(
TestMatMulV2Op
):
...
...
@@ -146,27 +145,28 @@ class TestMatMuklOp5(TestMatMulV2Op):
self
.
trans_y
=
False
#
class TestMatMuklOp6(TestMatMulV2Op):
#
"""
#
case 6
#
"""
class
TestMatMuklOp6
(
TestMatMulV2Op
):
"""
case 6
"""
#
def config(self):
# self.x_shape = (1, 2, 102, 1
)
# self.y_shape = (102,
)
# self.trans_x = Tru
e
#
self.trans_y = False
def
config
(
self
):
self
.
x_shape
=
(
1
,
2
,
102
,
10
)
self
.
y_shape
=
(
2
,
10
,
111
)
self
.
trans_x
=
Fals
e
self
.
trans_y
=
False
# class TestMatMuklOp7(TestMatMulV2Op):
# """
# case 7
# """
# def config(self):
# self.x_shape = (1, 2, 1, 100)
# self.y_shape = (100, )
# self.trans_x = False
# self.trans_y = False
class
TestMatMuklOp7
(
TestMatMulV2Op
):
"""
case 7
"""
def
config
(
self
):
self
.
x_shape
=
(
1
,
2
,
100
,
1
)
self
.
y_shape
=
(
2
,
100
,
12
)
self
.
trans_x
=
True
self
.
trans_y
=
False
class
TestMatMuklOp8
(
TestMatMulV2Op
):
...
...
@@ -181,49 +181,52 @@ class TestMatMuklOp8(TestMatMulV2Op):
self
.
trans_y
=
False
#
class TestMatMuklOp9(TestMatMulV2Op):
#
"""
#
case 9
#
"""
class
TestMatMuklOp9
(
TestMatMulV2Op
):
"""
case 9
"""
#
def config(self):
# self.x_shape = (1, 1, 1
, 100)
# self.y_shape = (2, 1, 2
, 100)
#
self.trans_x = False
#
self.trans_y = True
def
config
(
self
):
self
.
x_shape
=
(
100
,
20
,
100
)
self
.
y_shape
=
(
100
,
100
,
100
)
self
.
trans_x
=
False
self
.
trans_y
=
True
# class TestMatMuklOp10(TestMatMulV2Op):
# """
# case 10
# """
# def config(self):
# self.x_shape = (1, 1, 25, 4)
# self.y_shape = (1, 2, 4, 25)
# self.trans_x = False
# self.trans_y = False
class
TestMatMuklOp10
(
TestMatMulV2Op
):
"""
case 10
"""
# class TestMatMuklOp11(TestMatMulV2Op):
# """
# case 11
# """
def
config
(
self
):
self
.
x_shape
=
(
100
,
20
,
100
)
self
.
y_shape
=
(
100
,
20
,
100
)
self
.
trans_x
=
True
self
.
trans_y
=
False
# def config(self):
# self.x_shape = (2, 1, 2, 100)
# self.y_shape = (1, 1, 100, 2)
# self.trans_x = False
# self.trans_y = False
# class TestMatMuklOp12
(TestMatMulV2Op):
#
"""
# case 12
#
"""
class
TestMatMuklOp11
(
TestMatMulV2Op
):
"""
case 11
"""
# def config(self):
# self.x_shape = (2, 1, 4, 25)
# self.y_shape = (1, 1, 4, 25)
# self.trans_x = True
# self.trans_y = False
def
config
(
self
):
self
.
x_shape
=
(
2
,
20
,
100
)
self
.
y_shape
=
(
100
,
30
)
self
.
trans_x
=
False
self
.
trans_y
=
False
class
TestMatMuklOp12
(
TestMatMulV2Op
):
"""
case 12
"""
def
config
(
self
):
self
.
x_shape
=
(
1
,
20
,
100
)
self
.
y_shape
=
(
100
,
)
self
.
trans_x
=
False
self
.
trans_y
=
False
class
TestMatMuklOp13
(
TestMatMulV2Op
):
...
...
@@ -238,38 +241,40 @@ class TestMatMuklOp13(TestMatMulV2Op):
self
.
trans_y
=
False
#
class TestMatMuklOp14(TestMatMulV2Op):
#
"""
#
case 14_1
#
"""
class
TestMatMuklOp14
(
TestMatMulV2Op
):
"""
case 14_1
"""
#
def config(self):
# self.x_shape = (3, 1, 6, 6
)
# self.y_shape = (1, 2, 6, 9
)
# self.trans_x = Tru
e
#
self.trans_y = False
def
config
(
self
):
self
.
x_shape
=
(
100
,
2
,
100
,
10
)
self
.
y_shape
=
(
100
,
2
,
10
,
90
)
self
.
trans_x
=
Fals
e
self
.
trans_y
=
False
# class TestMatMuklOp15(TestMatMulV2Op):
# """
# case 14_2
# """
# def config(self):
# self.x_shape = (3, 1, 6, 6)
# self.y_shape = (1, 2, 6, 9)
# self.trans_x = False
# self.trans_y = False
class
TestMatMuklOp15
(
TestMatMulV2Op
):
"""
case 14_2
"""
# class TestMatMuklOp16(TestMatMulV2Op):
# """
# case 16 : to check the gradient for special case
# """
def
config
(
self
):
self
.
x_shape
=
(
100
,
2
,
100
,
10
)
self
.
y_shape
=
(
100
,
2
,
100
,
10
)
self
.
trans_x
=
False
self
.
trans_y
=
True
# def config(self):
# self.x_shape = (100)
# self.y_shape = (1, 2, 2, 100, 2)
# self.trans_x = False
# self.trans_y = False
class
TestMatMuklOp16
(
TestMatMulV2Op
):
"""
case 16 : to check the big data
"""
def
config
(
self
):
self
.
x_shape
=
(
1000
,
2
,
100
,
100
)
self
.
y_shape
=
(
1000
,
2
,
100
,
900
)
self
.
trans_x
=
False
self
.
trans_y
=
False
class
TestMatMuklOp17
(
TestMatMulV2Op
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录