Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
6a3c8725
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
6a3c8725
编写于
1月 14, 2021
作者:
T
taixiurong
提交者:
GitHub
1月 14, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
support transformer v2.0 (#30381)
上级
e85be1b1
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
693 addition
and
544 deletion
+693
-544
cmake/external/xpu.cmake
cmake/external/xpu.cmake
+1
-1
paddle/fluid/operators/layer_norm_op_xpu.cc
paddle/fluid/operators/layer_norm_op_xpu.cc
+13
-16
paddle/fluid/operators/matmul_op_xpu.cc
paddle/fluid/operators/matmul_op_xpu.cc
+90
-139
paddle/fluid/operators/matmul_v2_op_xpu.cc
paddle/fluid/operators/matmul_v2_op_xpu.cc
+159
-274
paddle/fluid/operators/one_hot_op_xpu.cc
paddle/fluid/operators/one_hot_op_xpu.cc
+1
-1
paddle/fluid/operators/one_hot_v2_op_xpu.cc
paddle/fluid/operators/one_hot_v2_op_xpu.cc
+70
-0
paddle/fluid/operators/scale_op_xpu.cc
paddle/fluid/operators/scale_op_xpu.cc
+7
-4
paddle/fluid/operators/softmax_op_xpu.cc
paddle/fluid/operators/softmax_op_xpu.cc
+15
-2
python/paddle/fluid/tests/unittests/xpu/test_matmul_v2_op_xpu.py
...paddle/fluid/tests/unittests/xpu/test_matmul_v2_op_xpu.py
+141
-107
python/paddle/fluid/tests/unittests/xpu/test_one_hot_v2_op_xpu.py
...addle/fluid/tests/unittests/xpu/test_one_hot_v2_op_xpu.py
+196
-0
未找到文件。
cmake/external/xpu.cmake
浏览文件 @
6a3c8725
...
...
@@ -10,7 +10,7 @@ if (WITH_AARCH64)
elseif
(
WITH_SUNWAY
)
SET
(
XPU_URL
"https://baidu-kunlun-public.su.bcebos.com/paddle_depence/sunway/xpu_2020_1227.tar.gz"
CACHE STRING
""
FORCE
)
else
()
SET
(
XPU_URL
"https://baidu-kunlun-public.su.bcebos.com/paddle_depence/xpu_2021_01
05
.tar.gz"
CACHE STRING
""
FORCE
)
SET
(
XPU_URL
"https://baidu-kunlun-public.su.bcebos.com/paddle_depence/xpu_2021_01
_13
.tar.gz"
CACHE STRING
""
FORCE
)
endif
()
SET
(
XPU_SOURCE_DIR
"
${
THIRD_PARTY_PATH
}
/xpu"
)
...
...
paddle/fluid/operators/layer_norm_op_xpu.cc
浏览文件 @
6a3c8725
...
...
@@ -45,15 +45,13 @@ class LayerNormXPUKernel : public framework::OpKernel<T> {
auto
*
mean_data
=
mean
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
*
variance_data
=
variance
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
int
r
=
xpu
::
layer_norm
(
dev_ctx
.
x_context
(),
left
,
right
,
x_data
,
y_data
,
scale_data
,
bias_data
,
epsilon
,
mean_data
,
variance_data
,
false
);
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU API(layer_norm) return wrong "
"value[%d], please check whether Baidu "
"Kunlun Card is properly installed."
,
r
));
int
r
=
xpu
::
layer_norm
(
dev_ctx
.
x_context
(),
x_data
,
y_data
,
left
,
right
,
epsilon
,
scale_data
,
bias_data
,
mean_data
,
variance_data
);
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU layer_norm kernel return wrong value[%d %s]"
,
r
,
XPUAPIErrorMsg
[
r
]));
}
};
...
...
@@ -87,15 +85,14 @@ class LayerNormGradXPUKernel : public framework::OpKernel<T> {
auto
*
dx_data
=
(
dx
==
nullptr
?
nullptr
:
dx
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()));
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
int
r
=
xpu
::
layer_norm_
backward
(
dev_ctx
.
x_context
(),
left
,
right
,
x_data
,
scale_data
,
variance
_data
,
mean_data
,
dy_data
,
dx_data
,
dscale_data
,
dbias_data
,
epsilon
);
int
r
=
xpu
::
layer_norm_
grad
(
dev_ctx
.
x_context
(),
x_data
,
dy_data
,
dx_data
,
left
,
right
,
epsilon
,
scale_data
,
mean
_data
,
variance_data
,
dscale_data
,
dbias_data
);
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU API(layer_norm_backward) return wrong "
"value[%d], please check whether Baidu "
"Kunlun Card is properly installed."
,
r
));
platform
::
errors
::
External
(
"XPU layer_norm_grad kernel return wrong value[%d %s]"
,
r
,
XPUAPIErrorMsg
[
r
]));
}
};
...
...
paddle/fluid/operators/matmul_op_xpu.cc
浏览文件 @
6a3c8725
...
...
@@ -24,6 +24,8 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
using
framework
::
Tensor
;
static
framework
::
DDim
RowMatrixFromVector
(
const
framework
::
DDim
&
x_dim
)
{
if
(
x_dim
.
size
()
>
1
)
{
return
x_dim
;
...
...
@@ -97,6 +99,86 @@ static void ReshapeXYOutIntoMatrixSequence(framework::Tensor *x,
ReshapeTensorIntoMatrixSequence
(
y
,
mat_dim_y
);
}
template
<
typename
T
,
typename
FCT
>
static
void
MatMulXPUFunction
(
const
Tensor
*
x
,
const
Tensor
*
y
,
Tensor
*
out
,
bool
trans_x
,
bool
trans_y
,
const
paddle
::
framework
::
ExecutionContext
&
ctx
)
{
const
auto
&
x_dims
=
x
->
dims
();
const
auto
&
y_dims
=
y
->
dims
();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
paddle
::
platform
::
XPUDeviceContext
>();
auto
mat_dim_a
=
math
::
CreateMatrixDescriptor
(
RowMatrixFromVector
(
x_dims
),
0
,
trans_x
);
auto
mat_dim_b
=
math
::
CreateMatrixDescriptor
(
ColumnMatrixFromVector
(
y_dims
),
0
,
trans_y
);
if
(
x_dims
.
size
()
==
3
&&
y_dims
.
size
()
<=
2
)
{
// if transpose_X is true, the transpose cost much time
if
(
!
trans_x
)
{
mat_dim_a
.
height_
*=
mat_dim_a
.
batch_size_
;
mat_dim_a
.
batch_size_
=
0
;
}
else
{
mat_dim_b
.
batch_size_
=
mat_dim_a
.
batch_size_
;
mat_dim_b
.
height_
=
mat_dim_b
.
height_
/
mat_dim_b
.
batch_size_
;
}
}
PADDLE_ENFORCE_EQ
(
mat_dim_a
.
width_
,
mat_dim_b
.
height_
,
platform
::
errors
::
InvalidArgument
(
"Shape mistake in matmul_op, the "
"first tensor width must be same as "
"second tensor height, but received "
"width:%d, height:%d"
,
mat_dim_a
.
width_
,
mat_dim_b
.
height_
));
PADDLE_ENFORCE_EQ
(
mat_dim_a
.
batch_size_
,
mat_dim_b
.
batch_size_
,
platform
::
errors
::
InvalidArgument
(
"Shape mistake in matmul_op, the two input"
"tensor batch_size must be same, but received first "
"tensor batch_size:%d, second "
"tensor batch_size:%d"
,
mat_dim_a
.
batch_size_
,
mat_dim_b
.
batch_size_
));
T
alpha
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"alpha"
));
float
*
data_c
=
out
->
data
<
T
>
();
int
m
=
mat_dim_a
.
height_
;
int
n
=
mat_dim_b
.
width_
;
int
k
=
mat_dim_a
.
width_
;
int
ldx
=
mat_dim_a
.
trans_
?
m
:
k
;
int
ldy
=
mat_dim_b
.
trans_
?
k
:
n
;
int
ldout
=
n
;
int
batch_size
=
mat_dim_a
.
batch_size_
;
if
(
batch_size
==
0
)
{
int
r
=
xpu
::
fc_fusion
<
float
,
float
,
float
,
FCT
>
(
dev_ctx
.
x_context
(),
x
->
data
<
T
>
(),
y
->
data
<
T
>
(),
data_c
,
m
,
n
,
k
,
mat_dim_a
.
trans_
,
mat_dim_b
.
trans_
,
nullptr
,
nullptr
,
nullptr
,
ldx
,
ldy
,
ldout
,
alpha
,
0
,
nullptr
,
xpu
::
Activation_t
::
LINEAR
);
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU fc_fusion kernel return wrong value[%d %s]"
,
r
,
XPUAPIErrorMsg
[
r
]));
}
else
{
// batch matmul
int
x_stride
=
mat_dim_a
.
stride_
;
int
y_stride
=
mat_dim_b
.
stride_
;
int
out_stride
=
m
*
n
;
for
(
int
i
=
0
;
i
<
batch_size
;
++
i
)
{
const
float
*
x_data
=
x
->
data
<
T
>
()
+
x_stride
*
i
;
const
float
*
y_data
=
y
->
data
<
T
>
()
+
y_stride
*
i
;
float
*
out_data
=
data_c
+
out_stride
*
i
;
int
r
=
xpu
::
fc_fusion
<
float
,
float
,
float
,
FCT
>
(
dev_ctx
.
x_context
(),
x_data
,
y_data
,
out_data
,
m
,
n
,
k
,
mat_dim_a
.
trans_
,
mat_dim_b
.
trans_
,
nullptr
,
nullptr
,
nullptr
,
ldx
,
ldy
,
ldout
,
alpha
,
0
,
nullptr
,
xpu
::
Activation_t
::
LINEAR
);
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU fc_fusion kernel return wrong value[%d %s]"
,
r
,
XPUAPIErrorMsg
[
r
]));
}
}
}
template
<
typename
DeviceContext
,
typename
T
>
class
MatMulXPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
...
...
@@ -105,78 +187,12 @@ class MatMulXPUKernel : public framework::OpKernel<T> {
auto
*
y
=
context
.
Input
<
framework
::
Tensor
>
(
"Y"
);
auto
*
out
=
context
.
Output
<
framework
::
Tensor
>
(
"Out"
);
out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
mat_dim_a
=
math
::
CreateMatrixDescriptor
(
RowMatrixFromVector
(
x
->
dims
()),
0
,
context
.
Attr
<
bool
>
(
"transpose_X"
));
auto
mat_dim_b
=
math
::
CreateMatrixDescriptor
(
ColumnMatrixFromVector
(
y
->
dims
()),
0
,
context
.
Attr
<
bool
>
(
"transpose_Y"
));
const
auto
&
x_dims
=
x
->
dims
();
const
auto
&
y_dims
=
y
->
dims
();
if
(
x_dims
.
size
()
==
3
&&
y_dims
.
size
()
<=
2
)
{
// if transpose_X is true, the transpose cost much time
if
(
!
context
.
Attr
<
bool
>
(
"transpose_X"
))
{
mat_dim_a
.
height_
*=
mat_dim_a
.
batch_size_
;
mat_dim_a
.
batch_size_
=
0
;
}
else
{
mat_dim_b
.
batch_size_
=
mat_dim_a
.
batch_size_
;
mat_dim_b
.
height_
=
mat_dim_b
.
height_
/
mat_dim_b
.
batch_size_
;
}
}
PADDLE_ENFORCE_EQ
(
mat_dim_a
.
width_
,
mat_dim_b
.
height_
,
platform
::
errors
::
InvalidArgument
(
"Shape mistake in matmul_op, the "
"first tensor width must be same as "
"second tensor height, but received "
"width:%d, height:%d"
,
mat_dim_a
.
width_
,
mat_dim_b
.
height_
));
PADDLE_ENFORCE_EQ
(
mat_dim_a
.
batch_size_
,
mat_dim_b
.
batch_size_
,
platform
::
errors
::
InvalidArgument
(
"Shape mistake in matmul_op, the two input"
"tensor batch_size must be same, but received first "
"tensor batch_size:%d, second "
"tensor batch_size:%d"
,
mat_dim_a
.
batch_size_
,
mat_dim_b
.
batch_size_
));
T
alpha
=
static_cast
<
T
>
(
context
.
Attr
<
float
>
(
"alpha"
));
auto
&
dev_ctx
=
context
.
template
device_context
<
DeviceContext
>();
float
*
data_c
=
out
->
data
<
T
>
();
int
m
=
mat_dim_a
.
height_
;
int
n
=
mat_dim_b
.
width_
;
int
k
=
mat_dim_a
.
width_
;
int
ldx
=
mat_dim_a
.
trans_
?
m
:
k
;
int
ldy
=
mat_dim_b
.
trans_
?
k
:
n
;
int
ldout
=
n
;
int
batch_size
=
mat_dim_a
.
batch_size_
;
if
(
batch_size
==
0
||
batch_size
==
1
)
{
int
r
=
xpu
::
fc_fusion
<
float
,
float
,
float
,
int16_t
>
(
dev_ctx
.
x_context
(),
x
->
data
<
T
>
(),
y
->
data
<
T
>
(),
data_c
,
m
,
n
,
k
,
mat_dim_a
.
trans_
,
mat_dim_b
.
trans_
,
nullptr
,
nullptr
,
nullptr
,
ldx
,
ldy
,
ldout
,
alpha
,
0
,
nullptr
,
xpu
::
Activation_t
::
LINEAR
);
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU fc_fusion kernel return wrong value[%d %s]"
,
r
,
XPUAPIErrorMsg
[
r
]));
bool
trans_x
=
context
.
Attr
<
bool
>
(
"transpose_X"
);
bool
trans_y
=
context
.
Attr
<
bool
>
(
"transpose_Y"
);
if
(
std
::
getenv
(
"XPU_PADDLE_MAT_MUL_FCINT32"
)
!=
nullptr
)
{
MatMulXPUFunction
<
T
,
int32_t
>
(
x
,
y
,
out
,
trans_x
,
trans_y
,
context
);
}
else
{
// batch matmul
int
x_stride
=
mat_dim_a
.
stride_
;
int
y_stride
=
mat_dim_b
.
stride_
;
int
out_stride
=
m
*
n
;
for
(
int
i
=
0
;
i
<
batch_size
;
++
i
)
{
const
float
*
x_data
=
x
->
data
<
T
>
()
+
x_stride
*
i
;
const
float
*
y_data
=
y
->
data
<
T
>
()
+
y_stride
*
i
;
float
*
out_data
=
data_c
+
out_stride
*
i
;
int
r
=
xpu
::
fc_fusion
<
float
,
float
,
float
,
int16_t
>
(
dev_ctx
.
x_context
(),
x_data
,
y_data
,
out_data
,
m
,
n
,
k
,
mat_dim_a
.
trans_
,
mat_dim_b
.
trans_
,
nullptr
,
nullptr
,
nullptr
,
ldx
,
ldy
,
ldout
,
alpha
,
0
,
nullptr
,
xpu
::
Activation_t
::
LINEAR
);
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU fc_fusion kernel return wrong value[%d %s]"
,
r
,
XPUAPIErrorMsg
[
r
]));
}
MatMulXPUFunction
<
T
,
int16_t
>
(
x
,
y
,
out
,
trans_x
,
trans_y
,
context
);
}
}
};
...
...
@@ -244,75 +260,10 @@ class MatMulGradXPUKernel : public framework::OpKernel<T> {
const
framework
::
Tensor
&
b
,
bool
trans_b
,
framework
::
Tensor
*
out
)
const
{
out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
mat_dim_a
=
math
::
CreateMatrixDescriptor
(
a
.
dims
(),
0
,
trans_a
);
auto
mat_dim_b
=
math
::
CreateMatrixDescriptor
(
b
.
dims
(),
0
,
trans_b
);
const
auto
&
a_dims
=
a
.
dims
();
const
auto
&
b_dims
=
b
.
dims
();
if
(
a_dims
.
size
()
==
3
&&
b_dims
.
size
()
<=
2
)
{
// if transpose_X is true, the transpose cost much time
if
(
!
context
.
Attr
<
bool
>
(
"transpose_X"
))
{
mat_dim_a
.
height_
*=
mat_dim_a
.
batch_size_
;
mat_dim_a
.
batch_size_
=
0
;
}
else
{
mat_dim_b
.
batch_size_
=
mat_dim_a
.
batch_size_
;
mat_dim_b
.
height_
=
mat_dim_b
.
height_
/
mat_dim_b
.
batch_size_
;
}
}
PADDLE_ENFORCE_EQ
(
mat_dim_a
.
width_
,
mat_dim_b
.
height_
,
platform
::
errors
::
InvalidArgument
(
"Shape mistake in matmul_grad_op, the "
"first tensor width must be same as second tensor "
"height, but received "
"width:%d, height:%d"
,
mat_dim_a
.
width_
,
mat_dim_b
.
height_
));
PADDLE_ENFORCE_EQ
(
mat_dim_a
.
batch_size_
,
mat_dim_b
.
batch_size_
,
platform
::
errors
::
InvalidArgument
(
"Shape mistake in matmul_grad_op, the two input"
"tensor batch_size must be same, but received first "
"tensor batch_size:%d, second "
"tensor batch_size:%d"
,
mat_dim_a
.
batch_size_
,
mat_dim_b
.
batch_size_
));
T
alpha
=
static_cast
<
T
>
(
context
.
Attr
<
float
>
(
"alpha"
));
auto
&
dev_ctx
=
context
.
template
device_context
<
DeviceContext
>();
float
*
data_c
=
out
->
data
<
T
>
();
int
m
=
mat_dim_a
.
height_
;
int
n
=
mat_dim_b
.
width_
;
int
k
=
mat_dim_a
.
width_
;
int
ldx
=
mat_dim_a
.
trans_
?
m
:
k
;
int
ldy
=
mat_dim_b
.
trans_
?
k
:
n
;
int
ldout
=
n
;
int
batch_size
=
mat_dim_a
.
batch_size_
;
if
(
batch_size
==
0
||
batch_size
==
1
)
{
int
r
=
xpu
::
fc_fusion
<
float
,
float
,
float
,
int16_t
>
(
dev_ctx
.
x_context
(),
a
.
data
<
T
>
(),
b
.
data
<
T
>
(),
data_c
,
m
,
n
,
k
,
mat_dim_a
.
trans_
,
mat_dim_b
.
trans_
,
nullptr
,
nullptr
,
nullptr
,
ldx
,
ldy
,
ldout
,
alpha
,
0
,
nullptr
,
xpu
::
Activation_t
::
LINEAR
);
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU fc_fusion kernel return wrong value[%d %s]"
,
r
,
XPUAPIErrorMsg
[
r
]));
if
(
std
::
getenv
(
"XPU_PADDLE_MAT_MUL_GRAD_FCINT32"
)
!=
nullptr
)
{
MatMulXPUFunction
<
T
,
int32_t
>
(
&
a
,
&
b
,
out
,
trans_a
,
trans_b
,
context
);
}
else
{
// batch matmul
int
x_stride
=
mat_dim_a
.
stride_
;
int
y_stride
=
mat_dim_b
.
stride_
;
int
out_stride
=
m
*
n
;
for
(
int
i
=
0
;
i
<
batch_size
;
++
i
)
{
const
float
*
x_data
=
a
.
data
<
T
>
()
+
x_stride
*
i
;
const
float
*
y_data
=
b
.
data
<
T
>
()
+
y_stride
*
i
;
float
*
out_data
=
data_c
+
out_stride
*
i
;
int
r
=
xpu
::
fc_fusion
<
float
,
float
,
float
,
int16_t
>
(
dev_ctx
.
x_context
(),
x_data
,
y_data
,
out_data
,
m
,
n
,
k
,
mat_dim_a
.
trans_
,
mat_dim_b
.
trans_
,
nullptr
,
nullptr
,
nullptr
,
ldx
,
ldy
,
ldout
,
alpha
,
0
,
nullptr
,
xpu
::
Activation_t
::
LINEAR
);
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU fc_fusion kernel return wrong value[%d %s]"
,
r
,
XPUAPIErrorMsg
[
r
]));
}
MatMulXPUFunction
<
T
,
int16_t
>
(
&
a
,
&
b
,
out
,
trans_a
,
trans_b
,
context
);
}
}
...
...
paddle/fluid/operators/matmul_v2_op_xpu.cc
浏览文件 @
6a3c8725
...
...
@@ -21,211 +21,141 @@
namespace
paddle
{
namespace
operators
{
template
<
typename
T
>
void
MatMulXPUFunction
(
const
Tensor
*
X
,
const
Tensor
*
Y
,
const
std
::
vector
<
std
::
int64_t
>&
x_dims
,
const
std
::
vector
<
std
::
int64_t
>&
y_dims
,
Tensor
*
Out
,
bool
trans_x
,
bool
trans_y
,
const
paddle
::
framework
::
ExecutionContext
&
ctx
)
{
const
int
x_ndim
=
x_dims
.
size
();
const
int
y_ndim
=
y_dims
.
size
();
template
<
typename
T
,
typename
FCT
>
static
void
MatMulXPUFunction
(
const
Tensor
*
x
,
const
Tensor
*
y
,
Tensor
*
out
,
bool
trans_x
,
bool
trans_y
,
const
paddle
::
framework
::
ExecutionContext
&
ctx
)
{
const
auto
&
x_dims
=
x
->
dims
();
const
auto
&
y_dims
=
y
->
dims
();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
paddle
::
platform
::
XPUDeviceContext
>();
// currently only support x_ndim == y_dim and non-broadcast case
PADDLE_ENFORCE_EQ
(
x_ndim
,
y_ndim
,
platform
::
errors
::
InvalidArgument
(
"Shape mistake in matmul_v2_op"
));
for
(
int
i
=
0
;
i
<
x_ndim
-
2
;
i
++
)
{
PADDLE_ENFORCE_EQ
(
x_dims
.
data
()[
i
],
y_dims
.
data
()[
i
],
platform
::
errors
::
InvalidArgument
(
"Shape mistake in matmul_v2_op"
));
}
int
ret
=
0
;
if
(
x_ndim
==
1
&&
y_ndim
==
1
)
{
PADDLE_ENFORCE_EQ
(
X
->
numel
(),
Y
->
numel
(),
platform
::
errors
::
InvalidArgument
(
"X's numbers is not equal to Y's numbers,"
"when X/Y's dims =1"
));
VLOG
(
3
)
<<
"MatMul's case 1"
;
Out
->
Resize
({
1
});
Out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
ret
=
baidu
::
xpu
::
api
::
fc_int16
(
dev_ctx
.
x_context
(),
false
,
false
,
1
,
1
,
X
->
numel
(),
1.0
f
,
X
->
data
<
T
>
(),
Y
->
data
<
T
>
(),
0.0
f
,
Out
->
data
<
T
>
());
PADDLE_ENFORCE_EQ
(
ret
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU API return wrong value[%d] in matmul_v2, please check whether "
"Baidu Kunlun Card is properly installed."
,
ret
));
return
;
}
auto
mat_dim_a
=
math
::
CreateMatrixDescriptor
(
RowMatrixFromVector
(
x_dims
),
0
,
trans_x
);
auto
mat_dim_b
=
math
::
CreateMatrixDescriptor
(
ColumnMatrixFromVector
(
y_dims
),
0
,
trans_y
);
if
(
x_ndim
==
1
)
{
const
int
N
=
X
->
numel
();
if
(
trans_y
)
{
PADDLE_ENFORCE_EQ
(
y_dims
[
y_ndim
-
1
],
N
,
platform
::
errors
::
InvalidArgument
(
"Input(Y) has error dim."
));
if
(
x_dims
.
size
()
==
3
&&
y_dims
.
size
()
<=
2
)
{
// if transpose_X is true, the transpose cost much time
if
(
!
trans_x
)
{
mat_dim_a
.
height_
*=
mat_dim_a
.
batch_size_
;
mat_dim_a
.
batch_size_
=
0
;
}
else
{
PADDLE_ENFORCE_EQ
(
y_dims
[
y_ndim
-
2
],
N
,
platform
::
errors
::
InvalidArgument
(
"Input(Y) has error dim."
));
mat_dim_b
.
batch_size_
=
mat_dim_a
.
batch_size_
;
mat_dim_b
.
height_
=
mat_dim_b
.
height_
/
mat_dim_b
.
batch_size_
;
}
std
::
vector
<
std
::
int64_t
>
out_dims
(
y_ndim
-
1
);
if
(
trans_y
)
{
std
::
copy_n
(
y_dims
.
cbegin
(),
y_ndim
-
1
,
out_dims
.
begin
());
}
else
{
std
::
copy_n
(
y_dims
.
cbegin
(),
y_ndim
-
2
,
out_dims
.
begin
());
out_dims
.
back
()
=
y_dims
.
back
();
}
Out
->
Resize
(
framework
::
make_ddim
(
out_dims
));
Out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
if
(
trans_y
)
{
const
int
M
=
Y
->
numel
()
/
N
;
VLOG
(
3
)
<<
"MatMul's case 2"
;
ret
=
baidu
::
xpu
::
api
::
fc_int16
(
dev_ctx
.
x_context
(),
false
,
true
,
1
,
M
,
N
,
1.0
f
,
X
->
data
<
T
>
(),
Y
->
data
<
T
>
(),
0.0
f
,
Out
->
data
<
T
>
());
PADDLE_ENFORCE_EQ
(
ret
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU API return wrong value[%d] in "
"matmul_v2, please check whether "
"Baidu Kunlun Card is properly installed."
,
ret
));
}
else
{
const
int
M
=
y_dims
[
y_ndim
-
1
];
const
int
batch_size
=
Y
->
numel
()
/
(
M
*
N
);
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
ret
=
baidu
::
xpu
::
api
::
fc_int16
(
dev_ctx
.
x_context
(),
false
,
false
,
1
,
M
,
N
,
1.0
f
,
X
->
data
<
T
>
(),
Y
->
data
<
T
>
()
+
i
*
M
*
N
,
0.0
f
,
Out
->
data
<
T
>
()
+
i
*
M
);
PADDLE_ENFORCE_EQ
(
ret
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU API return wrong value[%d] in matmul_v2, "
"please check whether "
"Baidu Kunlun Card is properly installed."
,
ret
));
}
}
return
;
}
if
(
y_ndim
==
1
)
{
const
int
N
=
Y
->
numel
();
if
(
trans_x
)
{
PADDLE_ENFORCE_EQ
(
x_dims
[
x_ndim
-
2
],
N
,
platform
::
errors
::
InvalidArgument
(
"Input(X) has error dim."
));
}
else
{
PADDLE_ENFORCE_EQ
(
x_dims
[
x_ndim
-
1
],
N
,
platform
::
errors
::
InvalidArgument
(
"Input(X) has error dim."
));
if
(
mat_dim_a
.
width_
==
mat_dim_b
.
height_
)
{
if
(
mat_dim_a
.
batch_size_
==
0
&&
mat_dim_b
.
batch_size_
==
1
)
{
mat_dim_a
.
batch_size_
=
mat_dim_b
.
batch_size_
=
0
;
}
std
::
vector
<
std
::
int64_t
>
out_dims
(
x_ndim
-
1
);
if
(
trans_x
)
{
std
::
copy_n
(
x_dims
.
cbegin
(),
x_ndim
-
2
,
out_dims
.
begin
());
out_dims
.
back
()
=
x_dims
.
back
();
}
else
{
std
::
copy_n
(
x_dims
.
cbegin
(),
x_ndim
-
1
,
out_dims
.
begin
());
}
Out
->
Resize
(
framework
::
make_ddim
(
out_dims
));
Out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
if
(
trans_x
)
{
const
int
M
=
x_dims
[
x_ndim
-
1
];
const
int
batch_size
=
X
->
numel
()
/
(
M
*
N
);
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
ret
=
baidu
::
xpu
::
api
::
fc_int16
(
dev_ctx
.
x_context
(),
true
,
false
,
M
,
1
,
N
,
1.0
f
,
X
->
data
<
T
>
()
+
i
*
M
*
N
,
Y
->
data
<
T
>
(),
0.0
f
,
Out
->
data
<
T
>
()
+
i
*
M
);
PADDLE_ENFORCE_EQ
(
ret
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU API return wrong value[%d] in matmul_v2, "
"please check whether "
"Baidu Kunlun Card is properly installed."
,
ret
));
}
}
else
{
const
int
M
=
X
->
numel
()
/
N
;
VLOG
(
3
)
<<
"MatMul's case 7"
;
ret
=
baidu
::
xpu
::
api
::
fc_int16
(
dev_ctx
.
x_context
(),
false
,
false
,
M
,
1
,
N
,
1.0
f
,
X
->
data
<
T
>
(),
Y
->
data
<
T
>
(),
0.0
f
,
Out
->
data
<
T
>
());
PADDLE_ENFORCE_EQ
(
ret
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU API return wrong value[%d] in "
"matmul_v2, please check whether "
"Baidu Kunlun Card is properly installed."
,
ret
));
if
(
mat_dim_a
.
batch_size_
==
1
&&
mat_dim_b
.
batch_size_
==
0
)
{
mat_dim_a
.
batch_size_
=
mat_dim_b
.
batch_size_
=
0
;
}
return
;
}
const
int
M
=
trans_x
?
x_dims
[
x_ndim
-
1
]
:
x_dims
[
x_ndim
-
2
];
const
int
K
=
trans_x
?
x_dims
[
x_ndim
-
2
]
:
x_dims
[
x_ndim
-
1
];
if
(
trans_y
)
{
PADDLE_ENFORCE_EQ
(
y_dims
[
y_ndim
-
1
],
K
,
platform
::
errors
::
InvalidArgument
(
"Input(X) has error dim."
));
PADDLE_ENFORCE_EQ
(
mat_dim_a
.
width_
,
mat_dim_b
.
height_
,
platform
::
errors
::
InvalidArgument
(
"Shape mistake in matmul_v2_op xdims = %s ydims = %s"
,
x_dims
.
to_str
(),
y_dims
.
to_str
()));
PADDLE_ENFORCE_EQ
(
mat_dim_a
.
batch_size_
,
mat_dim_b
.
batch_size_
,
platform
::
errors
::
InvalidArgument
(
"Shape mistake in matmul_v2_op xdims = %s ydims = %s"
,
x_dims
.
to_str
(),
y_dims
.
to_str
()));
float
*
data_c
=
out
->
data
<
T
>
();
int
m
=
mat_dim_a
.
height_
;
int
n
=
mat_dim_b
.
width_
;
int
k
=
mat_dim_a
.
width_
;
int
batch_size
=
mat_dim_a
.
batch_size_
;
if
(
batch_size
==
0
)
{
int
r
=
xpu
::
fc
<
float
,
float
,
float
,
FCT
>
(
dev_ctx
.
x_context
(),
x
->
data
<
T
>
(),
y
->
data
<
T
>
(),
data_c
,
m
,
n
,
k
,
mat_dim_a
.
trans_
,
mat_dim_b
.
trans_
,
nullptr
,
nullptr
,
nullptr
);
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU fc_fusion kernel return wrong value[%d %s]"
,
r
,
XPUAPIErrorMsg
[
r
]));
}
else
{
PADDLE_ENFORCE_EQ
(
y_dims
[
y_ndim
-
2
],
K
,
platform
::
errors
::
InvalidArgument
(
"Input(X) has error dim."
));
}
const
int
N
=
trans_y
?
y_dims
[
y_ndim
-
2
]
:
y_dims
[
y_ndim
-
1
];
const
int
ndim
=
(
std
::
max
)(
x_ndim
,
y_ndim
);
std
::
vector
<
std
::
int64_t
>
out_broadcast_dims
(
ndim
);
int
batch_size
=
1
;
for
(
int
i
=
0
;
i
<
ndim
-
2
;
i
++
)
{
PADDLE_ENFORCE_EQ
(
x_dims
.
data
()[
i
],
y_dims
.
data
()[
i
],
platform
::
errors
::
InvalidArgument
(
"Shape mistake in matmul_v2_op"
));
out_broadcast_dims
[
i
]
=
x_dims
.
data
()[
i
];
batch_size
*=
x_dims
.
data
()[
i
];
// batch matmul
int
x_stride
=
mat_dim_a
.
stride_
;
int
y_stride
=
mat_dim_b
.
stride_
;
int
out_stride
=
m
*
n
;
for
(
int
i
=
0
;
i
<
batch_size
;
++
i
)
{
const
float
*
x_data
=
x
->
data
<
T
>
()
+
x_stride
*
i
;
const
float
*
y_data
=
y
->
data
<
T
>
()
+
y_stride
*
i
;
float
*
out_data
=
data_c
+
out_stride
*
i
;
int
r
=
xpu
::
fc
<
float
,
float
,
float
,
FCT
>
(
dev_ctx
.
x_context
(),
x_data
,
y_data
,
out_data
,
m
,
n
,
k
,
mat_dim_a
.
trans_
,
mat_dim_b
.
trans_
,
nullptr
,
nullptr
,
nullptr
);
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU fc_fusion kernel return wrong value[%d %s]"
,
r
,
XPUAPIErrorMsg
[
r
]));
}
}
out_broadcast_dims
[
ndim
-
2
]
=
M
;
out_broadcast_dims
[
ndim
-
1
]
=
N
;
Out
->
Resize
(
framework
::
make_ddim
(
out_broadcast_dims
));
Out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
ret
=
baidu
::
xpu
::
api
::
batched_gemm_int16
(
dev_ctx
.
x_context
(),
trans_x
,
trans_y
,
batch_size
,
M
,
N
,
K
,
1.0
f
,
X
->
data
<
T
>
(),
Y
->
data
<
T
>
(),
Out
->
data
<
T
>
(),
nullptr
,
nullptr
);
PADDLE_ENFORCE_EQ
(
ret
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU API return wrong value[%d] in matmul_v2, please check whether "
"Baidu Kunlun Card is properly installed."
,
ret
));
}
template
<
typename
T
>
class
MatMulV2XPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
X
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
Y
=
ctx
.
Input
<
Tensor
>
(
"Y"
);
auto
*
O
ut
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
auto
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
y
=
ctx
.
Input
<
Tensor
>
(
"Y"
);
auto
*
o
ut
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
bool
trans_x
=
ctx
.
Attr
<
bool
>
(
"trans_x"
);
bool
trans_y
=
ctx
.
Attr
<
bool
>
(
"trans_y"
);
MatMulXPUFunction
<
T
>
(
X
,
Y
,
vectorize
(
X
->
dims
()),
vectorize
(
Y
->
dims
()),
Out
,
trans_x
,
trans_y
,
ctx
);
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
if
(
std
::
getenv
(
"XPU_PADDLE_MAT_MUL_V2_FCINT32"
)
!=
nullptr
)
{
MatMulXPUFunction
<
T
,
int32_t
>
(
x
,
y
,
out
,
trans_x
,
trans_y
,
ctx
);
}
else
{
MatMulXPUFunction
<
T
,
int16_t
>
(
x
,
y
,
out
,
trans_x
,
trans_y
,
ctx
);
}
}
};
template
<
typename
DeviceContext
,
typename
T
>
static
framework
::
Tensor
XPUFoldHeadAndLastDims
(
const
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
)
{
auto
in_dims
=
input
.
dims
();
if
(
in_dims
.
size
()
!=
3
)
{
return
input
;
}
framework
::
Tensor
output
;
output
.
Resize
({
in_dims
[
1
],
in_dims
[
0
],
in_dims
[
2
]});
output
.
mutable_data
<
T
>
(
context
.
GetPlace
());
std
::
vector
<
int
>
in_shape_host
=
{
static_cast
<
int
>
(
in_dims
[
0
]),
static_cast
<
int
>
(
in_dims
[
1
]),
static_cast
<
int
>
(
in_dims
[
2
])};
std
::
vector
<
int
>
axis_host
=
{
1
,
0
,
2
};
int
r
=
xpu
::
transpose
(
context
.
x_context
(),
input
.
data
<
T
>
(),
output
.
data
<
T
>
(),
in_shape_host
.
data
(),
axis_host
.
data
(),
/*ndims=*/
3
);
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU transpose kernel return wrong value[%d %s]"
,
r
,
XPUAPIErrorMsg
[
r
]));
output
.
Resize
({
in_dims
[
1
],
in_dims
[
0
]
*
in_dims
[
2
]});
return
output
;
}
template
<
typename
T
>
class
MatMulV2XPUGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
MatMul
(
const
framework
::
ExecutionContext
&
c
ontext
,
void
MatMul
(
const
framework
::
ExecutionContext
&
c
tx
,
const
framework
::
Tensor
&
a
,
bool
trans_a
,
const
framework
::
Tensor
&
b
,
bool
trans_b
,
framework
::
Tensor
*
out
)
const
{
out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
MatMulXPUFunction
<
T
>
(
&
a
,
&
b
,
vectorize
(
a
.
dims
()),
vectorize
(
b
.
dims
()),
out
,
trans_a
,
trans_b
,
context
);
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
if
(
std
::
getenv
(
"XPU_PADDLE_MAT_MUL_GRAD_V2_FCINT32"
)
!=
nullptr
)
{
MatMulXPUFunction
<
T
,
int32_t
>
(
&
a
,
&
b
,
out
,
trans_a
,
trans_b
,
ctx
);
}
else
{
MatMulXPUFunction
<
T
,
int16_t
>
(
&
a
,
&
b
,
out
,
trans_a
,
trans_b
,
ctx
);
}
}
void
CalcInputGrad
(
const
framework
::
ExecutionContext
&
context
,
...
...
@@ -239,118 +169,73 @@ class MatMulV2XPUGradKernel : public framework::OpKernel<T> {
if
(
!
need_combine
)
{
MatMul
(
context
,
a
,
trans_a
,
b
,
trans_b
,
out
);
}
else
{
// currently not support this case
auto
&
dev_ctx
=
context
.
template
device_context
<
paddle
::
platform
::
XPUDeviceContext
>();
MatMul
(
context
,
is_fold_init_dims_a
?
FoldInitDims
(
a
)
:
XPUFoldHeadAndLastDims
<
paddle
::
platform
::
XPUDeviceContext
,
T
>
(
dev_ctx
,
a
),
trans_a
,
is_fold_init_dims_b
?
FoldInitDims
(
b
)
:
XPUFoldHeadAndLastDims
<
paddle
::
platform
::
XPUDeviceContext
,
T
>
(
dev_ctx
,
b
),
trans_b
,
out
);
}
}
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
bool
transpose_x
=
ctx
.
Attr
<
bool
>
(
"trans_x"
);
bool
transpose_y
=
ctx
.
Attr
<
bool
>
(
"trans_y"
);
auto
x
=
*
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
y
=
*
ctx
.
Input
<
framework
::
Tensor
>
(
"Y"
);
auto
dout
=
*
ctx
.
Input
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
// get dims
std
::
vector
<
std
::
int64_t
>
x_dims
=
vectorize
(
x
.
dims
());
std
::
vector
<
std
::
int64_t
>
y_dims
=
vectorize
(
y
.
dims
());
std
::
vector
<
std
::
int64_t
>
dout_dims
=
vectorize
(
dout
.
dims
());
int
x_ndim
=
x_dims
.
size
();
int
y_ndim
=
y_dims
.
size
();
auto
*
dx
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
*
dy
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
auto
&
dev_ctx
=
ctx
.
template
device_context
<
paddle
::
platform
::
XPUDeviceContext
>();
// Case1 : x's or y's dim = 1
int
ret
=
0
;
if
(
x_ndim
==
1
&&
y_ndim
==
1
)
{
if
(
dx
)
{
dx
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
ret
=
baidu
::
xpu
::
api
::
fc_int16
(
dev_ctx
.
x_context
(),
false
,
false
,
dx
->
numel
(),
1
,
1
,
1.0
f
,
y
.
data
<
T
>
(),
dout
.
data
<
T
>
(),
0.0
f
,
dx
->
data
<
T
>
());
PADDLE_ENFORCE_EQ
(
ret
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU API return wrong value[%d] in "
"matmul_v2_grad, please check whether "
"Baidu Kunlun Card is properly installed."
,
ret
));
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
bool
transpose_x
=
context
.
Attr
<
bool
>
(
"trans_x"
);
bool
transpose_y
=
context
.
Attr
<
bool
>
(
"trans_y"
);
auto
x
=
*
context
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
y
=
*
context
.
Input
<
framework
::
Tensor
>
(
"Y"
);
auto
dout
=
*
context
.
Input
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
dx
=
context
.
Output
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
*
dy
=
context
.
Output
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
ReshapeXYOutIntoMatrixSequence
(
&
x
,
&
y
,
&
dout
,
transpose_x
,
transpose_y
);
framework
::
DDim
dx_dims
;
if
(
dx
)
{
dx_dims
=
dx
->
dims
();
if
(
dx_dims
!=
x
.
dims
())
{
dx
->
Resize
(
x
.
dims
());
}
if
(
dy
)
{
dy
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
ret
=
baidu
::
xpu
::
api
::
fc_int16
(
dev_ctx
.
x_context
(),
false
,
false
,
dy
->
numel
(),
1
,
1
,
1.0
f
,
x
.
data
<
T
>
(),
dout
.
data
<
T
>
(),
0.0
f
,
dy
->
data
<
T
>
());
PADDLE_ENFORCE_EQ
(
ret
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU API return wrong value[%d] in "
"matmul_v2_grad, please check whether "
"Baidu Kunlun Card is properly installed."
,
ret
));
}
framework
::
DDim
dy_dims
;
if
(
dy
)
{
dy_dims
=
dy
->
dims
();
if
(
dy_dims
!=
y
.
dims
())
{
dy
->
Resize
(
y
.
dims
());
}
return
;
}
bool
is_broadcast
=
true
;
if
(
x_ndim
<=
2
||
y_ndim
<=
2
)
{
is_broadcast
=
false
;
}
else
if
(
x_ndim
!=
y_ndim
)
{
is_broadcast
=
true
;
if
(
transpose_x
&&
transpose_y
)
{
CalcInputGrad
(
context
,
y
,
true
,
true
,
dout
,
true
,
false
,
dx
);
CalcInputGrad
(
context
,
dout
,
true
,
true
,
x
,
true
,
false
,
dy
);
}
else
if
(
transpose_x
)
{
CalcInputGrad
(
context
,
y
,
false
,
false
,
dout
,
true
,
false
,
dx
);
CalcInputGrad
(
context
,
x
,
false
,
false
,
dout
,
false
,
true
,
dy
);
}
else
if
(
transpose_y
)
{
CalcInputGrad
(
context
,
dout
,
false
,
false
,
y
,
false
,
true
,
dx
);
CalcInputGrad
(
context
,
dout
,
true
,
true
,
x
,
false
,
true
,
dy
);
}
else
{
is_broadcast
=
!
std
::
equal
(
x_dims
.
cbegin
(),
x_dims
.
cbegin
()
+
x_ndim
-
2
,
y_dims
.
cbegin
()
);
CalcInputGrad
(
context
,
dout
,
false
,
false
,
y
,
true
,
false
,
dx
);
CalcInputGrad
(
context
,
x
,
true
,
true
,
dout
,
false
,
true
,
dy
);
}
// currently only support non-broadcast case
PADDLE_ENFORCE_EQ
(
is_broadcast
,
false
,
platform
::
errors
::
InvalidArgument
(
"Shape mistake in matmul_v2_op"
));
// Case2: no broadcast or no batch size, it aims to speed and it is same as
// matmul in old version.
if
(
!
is_broadcast
)
{
ReshapeXYOutIntoMatrixSequence
(
&
x
,
&
y
,
&
dout
,
transpose_x
,
transpose_y
);
framework
::
DDim
dx_dims
;
if
(
dx
)
{
dx_dims
=
dx
->
dims
();
if
(
dx_dims
!=
x
.
dims
())
{
dx
->
Resize
(
x
.
dims
());
}
}
framework
::
DDim
dy_dims
;
if
(
dy
)
{
dy_dims
=
dy
->
dims
();
if
(
dy_dims
!=
y
.
dims
())
{
dy
->
Resize
(
y
.
dims
());
}
}
if
(
transpose_x
&&
transpose_y
)
{
CalcInputGrad
(
ctx
,
y
,
true
,
true
,
dout
,
true
,
false
,
dx
);
CalcInputGrad
(
ctx
,
dout
,
true
,
true
,
x
,
true
,
false
,
dy
);
}
else
if
(
transpose_x
)
{
CalcInputGrad
(
ctx
,
y
,
false
,
false
,
dout
,
true
,
false
,
dx
);
CalcInputGrad
(
ctx
,
x
,
false
,
false
,
dout
,
false
,
true
,
dy
);
}
else
if
(
transpose_y
)
{
CalcInputGrad
(
ctx
,
dout
,
false
,
false
,
y
,
false
,
true
,
dx
);
CalcInputGrad
(
ctx
,
dout
,
true
,
true
,
x
,
false
,
true
,
dy
);
}
else
{
CalcInputGrad
(
ctx
,
dout
,
false
,
false
,
y
,
true
,
false
,
dx
);
CalcInputGrad
(
ctx
,
x
,
true
,
true
,
dout
,
false
,
true
,
dy
);
if
(
dx
)
{
if
(
dx_dims
!=
x
.
dims
())
{
dx
->
Resize
(
dx_dims
);
}
}
if
(
dx
)
{
if
(
dx_dims
!=
x
.
dims
())
{
dx
->
Resize
(
dx_dims
);
}
}
if
(
dy
)
{
if
(
dy_dims
!=
y
.
dims
())
{
dy
->
Resize
(
dy_dims
);
}
if
(
dy
)
{
if
(
dy_dims
!=
y
.
dims
())
{
dy
->
Resize
(
dy_dims
);
}
}
}
...
...
paddle/fluid/operators/one_hot_op_xpu.cc
浏览文件 @
6a3c8725
...
...
@@ -35,7 +35,7 @@ class OneHotXPUKernel : public framework::OpKernel<T> {
if
(
context
.
HasInput
(
"depth_tensor"
))
{
auto
*
depth_tensor
=
context
.
Input
<
Tensor
>
(
"depth_tensor"
);
auto
*
depth_data
=
depth_tensor
->
data
<
int32_t
>
();
if
(
depth_tensor
->
place
()
==
platform
::
XPUPlace
(
))
{
if
(
platform
::
is_xpu_place
(
depth_tensor
->
place
()
))
{
xpu_memcpy
(
static_cast
<
void
*>
(
&
depth
),
static_cast
<
const
void
*>
(
depth_data
),
sizeof
(
int32_t
),
XPU_DEVICE_TO_HOST
);
...
...
paddle/fluid/operators/one_hot_v2_op_xpu.cc
0 → 100644
浏览文件 @
6a3c8725
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifdef PADDLE_WITH_XPU
#include <string>
#include <vector>
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/operators/one_hot_op.h"
namespace
paddle
{
namespace
operators
{
using
LoDTensor
=
framework
::
LoDTensor
;
using
Tensor
=
framework
::
Tensor
;
template
<
typename
DeviceContext
,
typename
T
>
class
OneHotV2XPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
in
=
context
.
Input
<
LoDTensor
>
(
"X"
);
auto
*
out
=
context
.
Output
<
LoDTensor
>
(
"Out"
);
int
depth
=
context
.
Attr
<
int
>
(
"depth"
);
if
(
context
.
HasInput
(
"depth_tensor"
))
{
auto
*
depth_tensor
=
context
.
Input
<
Tensor
>
(
"depth_tensor"
);
auto
*
depth_data
=
depth_tensor
->
data
<
int32_t
>
();
if
(
platform
::
is_xpu_place
(
depth_tensor
->
place
()))
{
xpu_memcpy
(
static_cast
<
void
*>
(
&
depth
),
static_cast
<
const
void
*>
(
depth_data
),
sizeof
(
int32_t
),
XPU_DEVICE_TO_HOST
);
}
else
{
depth
=
depth_data
[
0
];
}
auto
out_dims
=
out
->
dims
();
out_dims
[
out_dims
.
size
()
-
1
]
=
depth
;
out
->
Resize
(
out_dims
);
}
auto
&
dev_ctx
=
context
.
template
device_context
<
DeviceContext
>();
int
len
=
in
->
numel
();
int
ret
=
xpu
::
one_hot
<
T
>
(
dev_ctx
.
x_context
(),
in
->
data
<
T
>
(),
out
->
mutable_data
<
float
>
(
context
.
GetPlace
()),
len
,
depth
,
1.0
,
0.0
);
PADDLE_ENFORCE_EQ
(
ret
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU one_hot kernel return wrong value[%d %s]"
,
ret
,
XPUAPIErrorMsg
[
ret
]));
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_XPU_KERNEL
(
one_hot_v2
,
ops
::
OneHotV2XPUKernel
<
paddle
::
platform
::
XPUDeviceContext
,
int
>
,
ops
::
OneHotV2XPUKernel
<
paddle
::
platform
::
XPUDeviceContext
,
int64_t
>
);
#endif
paddle/fluid/operators/scale_op_xpu.cc
浏览文件 @
6a3c8725
...
...
@@ -46,10 +46,13 @@ class ScaleXPUKernel : public framework::OpKernel<T> {
in
->
dims
().
to_str
().
c_str
(),
out
->
dims
().
to_str
().
c_str
()));
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
int
r
=
xpu
::
scale
(
dev_ctx
.
x_context
(),
in
->
numel
(),
scale
,
bias
,
bias_after_scale
,
in
->
data
<
float
>
(),
out
->
data
<
float
>
());
PADDLE_ENFORCE_EQ
(
r
,
xpu
::
Error_t
::
SUCCESS
,
platform
::
errors
::
Fatal
(
"XPU scale kernel error!"
));
int
r
=
xpu
::
scale
(
dev_ctx
.
x_context
(),
in
->
data
<
float
>
(),
out
->
data
<
float
>
(),
in
->
numel
(),
bias_after_scale
,
scale
,
bias
);
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU scale kernel return wrong value[%d %s]"
,
r
,
XPUAPIErrorMsg
[
r
]));
}
};
...
...
paddle/fluid/operators/softmax_op_xpu.cc
浏览文件 @
6a3c8725
...
...
@@ -41,8 +41,21 @@ class SoftmaxXPUKernel : public framework::OpKernel<T> {
}
auto
&
dev_ctx
=
context
.
template
device_context
<
DeviceContext
>();
int
r
=
xpu
::
softmax
<
T
>
(
dev_ctx
.
x_context
(),
x
->
data
<
float
>
(),
out
->
data
<
float
>
(),
x_dims
,
axis
);
int
r
=
XPU_SUCCESS
;
Tensor
clip_x
;
int
len
=
x
->
numel
();
T
*
clip_x_data
=
clip_x
.
mutable_data
<
T
>
(
platform
::
XPUPlace
(),
len
*
sizeof
(
T
));
r
=
xpu
::
clip
(
dev_ctx
.
x_context
(),
x
->
data
<
float
>
(),
clip_x_data
,
len
,
-
1e30
,
1e30
);
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU API(clip) return wrong "
"value[%d %s]"
,
r
,
XPUAPIErrorMsg
[
r
]));
r
=
xpu
::
softmax
<
T
>
(
dev_ctx
.
x_context
(),
clip_x_data
,
out
->
data
<
float
>
(),
x_dims
,
axis
);
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU API(softmax2d_forward) return wrong "
...
...
python/paddle/fluid/tests/unittests/xpu/test_matmul_v2_op_xpu.py
浏览文件 @
6a3c8725
...
...
@@ -13,12 +13,11 @@
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
import
sys
sys
.
path
.
append
(
".."
)
from
op_test
import
OpTest
import
unittest
import
numpy
as
np
from
op_test_xpu
import
XPUOpTest
import
paddle.fluid.core
as
core
import
paddle
...
...
@@ -57,9 +56,7 @@ def reference_matmul(X, Y, transpose_X=False, transpose_Y=False):
return
Out
@
unittest
.
skipIf
(
not
paddle
.
is_compiled_with_xpu
(),
"core is not compiled with XPU"
)
class
TestMatMulV2Op
(
OpTest
):
class
TestMatMulV2Op
(
XPUOpTest
):
"""
case 1
"""
...
...
@@ -74,10 +71,10 @@ class TestMatMulV2Op(OpTest):
self
.
dtype
=
"float32"
def
setUp
(
self
):
self
.
use_xpu
=
True
self
.
init_kernel_type
()
self
.
config
()
self
.
op_type
=
"matmul_v2"
self
.
use_xpu
=
True
x
=
np
.
random
.
random
(
self
.
x_shape
).
astype
(
self
.
dtype
)
y
=
np
.
random
.
random
(
self
.
y_shape
).
astype
(
self
.
dtype
)
# -0.1 ~ 0.1
...
...
@@ -94,31 +91,25 @@ class TestMatMulV2Op(OpTest):
def
test_check_output
(
self
):
place
=
paddle
.
XPUPlace
(
0
)
self
.
check_output_with_place
(
place
,
atol
=
0.01
)
self
.
check_output_with_place
(
place
)
def
test_check_grad
(
self
):
place
=
paddle
.
XPUPlace
(
0
)
self
.
check_grad_with_place
(
place
,
[
'X'
,
'Y'
],
'Out'
,
max_relative_error
=
0.1
)
self
.
check_grad_with_place
(
place
,
[
'X'
,
'Y'
],
'Out'
)
'''
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestMatMuklOp2(TestMatMulV2Op):
"""
case 2
"""
# class TestMatMuklOp2(TestMatMulV2Op):
# """
# case 2
# """
def config(self):
self.x_shape = (100, )
self.y_shape = (1, 3, 2, 100)
self.trans_x = False
self.trans_y = True
#
def config(self):
#
self.x_shape = (100, )
#
self.y_shape = (1, 3, 2, 100)
#
self.trans_x = False
#
self.trans_y = True
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class
TestMatMuklOp3
(
TestMatMulV2Op
):
"""
case 3
...
...
@@ -131,21 +122,18 @@ class TestMatMuklOp3(TestMatMulV2Op):
self
.
trans_y
=
False
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestMatMuklOp4(TestMatMulV2Op):
"""
case 4
"""
# class TestMatMuklOp4(TestMatMulV2Op):
# """
# case 4
# """
# def config(self):
# self.x_shape = (100, )
# self.y_shape = (1, 2, 100, 2)
# self.trans_x = False
# self.trans_y = False
def config(self):
self.x_shape = (100, )
self.y_shape = (1, 2, 100, 2)
self.trans_x = False
self.trans_y = False
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class
TestMatMuklOp5
(
TestMatMulV2Op
):
"""
case 5
...
...
@@ -158,37 +146,29 @@ class TestMatMuklOp5(TestMatMulV2Op):
self
.
trans_y
=
False
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestMatMuklOp6(TestMatMulV2Op):
"""
case 6
"""
def config(self):
self.x_shape = (1, 2, 100, 1)
self.y_shape = (100, )
self.trans_x = True
self.trans_y = False
# class TestMatMuklOp6(TestMatMulV2Op):
# """
# case 6
# """
# def config(self):
# self.x_shape = (1, 2, 102, 1)
# self.y_shape = (102, )
# self.trans_x = True
# self.trans_y = False
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestMatMuklOp7(TestMatMulV2Op):
"""
case 7
"""
# class TestMatMuklOp7(TestMatMulV2Op):
# """
# case 7
# """
def config(self):
self.x_shape = (1, 2, 1, 100)
self.y_shape = (100, )
self.trans_x = False
self.trans_y = False
'''
# def config(self):
# self.x_shape = (1, 2, 1, 100)
# self.y_shape = (100, )
# self.trans_x = False
# self.trans_y = False
@
unittest
.
skipIf
(
not
paddle
.
is_compiled_with_xpu
(),
"core is not compiled with XPU"
)
class
TestMatMuklOp8
(
TestMatMulV2Op
):
"""
case 8
...
...
@@ -201,37 +181,97 @@ class TestMatMuklOp8(TestMatMulV2Op):
self
.
trans_y
=
False
@
unittest
.
skipIf
(
not
paddle
.
is_compiled_with_xpu
(),
"core is not compiled with XPU"
)
# class TestMatMuklOp9(TestMatMulV2Op):
# """
# case 9
# """
# def config(self):
# self.x_shape = (1, 1, 1, 100)
# self.y_shape = (2, 1, 2, 100)
# self.trans_x = False
# self.trans_y = True
# class TestMatMuklOp10(TestMatMulV2Op):
# """
# case 10
# """
# def config(self):
# self.x_shape = (1, 1, 25, 4)
# self.y_shape = (1, 2, 4, 25)
# self.trans_x = False
# self.trans_y = False
# class TestMatMuklOp11(TestMatMulV2Op):
# """
# case 11
# """
# def config(self):
# self.x_shape = (2, 1, 2, 100)
# self.y_shape = (1, 1, 100, 2)
# self.trans_x = False
# self.trans_y = False
# class TestMatMuklOp12(TestMatMulV2Op):
# """
# case 12
# """
# def config(self):
# self.x_shape = (2, 1, 4, 25)
# self.y_shape = (1, 1, 4, 25)
# self.trans_x = True
# self.trans_y = False
class
TestMatMuklOp13
(
TestMatMulV2Op
):
"""
case 13
"""
def
config
(
self
):
self
.
x_shape
=
(
2
,
2
,
2
,
5
0
)
self
.
y_shape
=
(
2
,
2
,
2
,
5
0
)
self
.
x_shape
=
(
2
,
2
,
10
,
1
0
)
self
.
y_shape
=
(
2
,
2
,
10
,
1
0
)
self
.
trans_x
=
True
self
.
trans_y
=
False
'''
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestMatMuklOp16(TestMatMulV2Op):
"""
case 16 : to check the gradient for special case
"""
# class TestMatMuklOp14(TestMatMulV2Op):
# """
# case 14_1
# """
def config(self):
self.x_shape = (100)
self.y_shape = (1, 2, 2, 100, 2)
self.trans_x = False
self.trans_y = False
# def config(self):
# self.x_shape = (3, 1, 6, 6)
# self.y_shape = (1, 2, 6, 9)
# self.trans_x = True
# self.trans_y = False
# class TestMatMuklOp15(TestMatMulV2Op):
# """
# case 14_2
# """
# def config(self):
# self.x_shape = (3, 1, 6, 6)
# self.y_shape = (1, 2, 6, 9)
# self.trans_x = False
# self.trans_y = False
# class TestMatMuklOp16(TestMatMulV2Op):
# """
# case 16 : to check the gradient for special case
# """
# def config(self):
# self.x_shape = (100)
# self.y_shape = (1, 2, 2, 100, 2)
# self.trans_x = False
# self.trans_y = False
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class
TestMatMuklOp17
(
TestMatMulV2Op
):
"""
case 17 : to check the gradient for special case
...
...
@@ -242,36 +282,30 @@ class TestMatMuklOp17(TestMatMulV2Op):
self
.
y_shape
=
(
100
)
self
.
trans_x
=
False
self
.
trans_y
=
False
'''
@
unittest
.
skipIf
(
not
paddle
.
is_compiled_with_xpu
(),
"core is not compiled with XPU"
)
class
TestMatMulV2API
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
places
=
[
fluid
.
CPUPlace
()]
self
.
places
.
append
(
fluid
.
XPUPlace
(
0
))
def
check_static_result
(
self
,
place
):
with
fluid
.
program_guard
(
fluid
.
Program
(),
fluid
.
Program
()):
input_x
=
fluid
.
data
(
name
=
"input_x"
,
shape
=
[
4
,
3
],
dtype
=
"float32"
)
input_y
=
fluid
.
data
(
name
=
"input_y"
,
shape
=
[
3
,
4
],
dtype
=
"float32"
)
result
=
paddle
.
matmul
(
input_x
,
input_y
)
x_np
=
np
.
random
.
random
([
4
,
3
]).
astype
(
"float32"
)
y_np
=
np
.
random
.
random
([
3
,
4
]).
astype
(
"float32"
)
# class TestMatMuklOpBroadcast1(TestMatMulV2Op):
# """
# case 14_3
# """
exe
=
fluid
.
Executor
(
place
)
fetches
=
exe
.
run
(
fluid
.
default_main_program
(),
feed
=
{
"input_x"
:
x_np
,
"input_y"
:
y_np
},
fetch_list
=
[
result
])
# def config(self):
# self.x_shape = (3, 1, 10, 10)
# self.y_shape = (1, 2, 10, 10)
# self.trans_x = True
# self.trans_y = True
def
test_static
(
self
):
for
place
in
self
.
places
:
self
.
check_static_result
(
place
=
place
)
# class TestMatMuklOpBroadcast2(TestMatMulV2Op):
# """
# case 14_4
# """
# def config(self):
# self.x_shape = (3, 1, 10, 10)
# self.y_shape = (1, 2, 10, 10)
# self.trans_x = False
# self.trans_y = True
if
__name__
==
"__main__"
:
paddle
.
enable_static
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/xpu/test_one_hot_v2_op_xpu.py
0 → 100644
浏览文件 @
6a3c8725
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
import
paddle
import
paddle.fluid.core
as
core
import
sys
sys
.
path
.
append
(
".."
)
from
op_test_xpu
import
XPUOpTest
import
paddle.fluid
as
fluid
from
paddle.fluid
import
Program
,
program_guard
import
time
paddle
.
enable_static
()
class
TestOneHotOp
(
XPUOpTest
):
def
setUp
(
self
):
self
.
use_xpu
=
True
self
.
op_type
=
'one_hot_v2'
depth
=
10
depth_np
=
np
.
array
(
10
).
astype
(
'int32'
)
# dimension = 12
x_lod
=
[[
4
,
1
,
3
,
3
]]
x
=
[
np
.
random
.
randint
(
0
,
depth
-
1
)
for
i
in
range
(
sum
(
x_lod
[
0
]))]
x
=
np
.
array
(
x
).
astype
(
'int32'
).
reshape
([
sum
(
x_lod
[
0
])])
out
=
np
.
zeros
(
shape
=
(
np
.
product
(
x
.
shape
),
depth
)).
astype
(
'float32'
)
for
i
in
range
(
np
.
product
(
x
.
shape
)):
out
[
i
,
x
[
i
]]
=
1.0
self
.
inputs
=
{
'X'
:
(
x
,
x_lod
),
'depth_tensor'
:
depth_np
}
self
.
attrs
=
{
'dtype'
:
int
(
core
.
VarDesc
.
VarType
.
FP32
)}
self
.
outputs
=
{
'Out'
:
(
out
,
x_lod
)}
def
test_check_output
(
self
):
place
=
paddle
.
XPUPlace
(
0
)
self
.
check_output_with_place
(
place
,
check_dygraph
=
False
)
class
TestOneHotOp_attr
(
XPUOpTest
):
def
setUp
(
self
):
self
.
op_type
=
'one_hot_v2'
depth
=
10
dimension
=
12
x_lod
=
[[
4
,
1
,
3
,
3
]]
x
=
[
np
.
random
.
randint
(
0
,
depth
-
1
)
for
i
in
range
(
sum
(
x_lod
[
0
]))]
x
=
np
.
array
(
x
).
astype
(
'int32'
).
reshape
([
sum
(
x_lod
[
0
]),
1
])
out
=
np
.
zeros
(
shape
=
(
np
.
product
(
x
.
shape
[:
-
1
]),
1
,
depth
)).
astype
(
'float32'
)
for
i
in
range
(
np
.
product
(
x
.
shape
)):
out
[
i
,
0
,
x
[
i
]]
=
1.0
self
.
inputs
=
{
'X'
:
(
x
,
x_lod
)}
self
.
attrs
=
{
'dtype'
:
int
(
core
.
VarDesc
.
VarType
.
FP32
),
'depth'
:
depth
}
self
.
outputs
=
{
'Out'
:
(
out
,
x_lod
)}
def
test_check_output
(
self
):
place
=
paddle
.
XPUPlace
(
0
)
self
.
check_output_with_place
(
place
,
check_dygraph
=
False
)
class
TestOneHotOp_default_dtype
(
XPUOpTest
):
def
setUp
(
self
):
self
.
op_type
=
'one_hot_v2'
depth
=
10
depth_np
=
np
.
array
(
10
).
astype
(
'int32'
)
dimension
=
12
x_lod
=
[[
4
,
1
,
3
,
3
]]
x
=
[
np
.
random
.
randint
(
0
,
depth
-
1
)
for
i
in
range
(
sum
(
x_lod
[
0
]))]
x
=
np
.
array
(
x
).
astype
(
'int32'
).
reshape
([
sum
(
x_lod
[
0
])])
out
=
np
.
zeros
(
shape
=
(
np
.
product
(
x
.
shape
),
depth
)).
astype
(
'float32'
)
for
i
in
range
(
np
.
product
(
x
.
shape
)):
out
[
i
,
x
[
i
]]
=
1.0
self
.
inputs
=
{
'X'
:
(
x
,
x_lod
),
'depth_tensor'
:
depth_np
}
self
.
attrs
=
{}
self
.
outputs
=
{
'Out'
:
(
out
,
x_lod
)}
def
test_check_output
(
self
):
place
=
paddle
.
XPUPlace
(
0
)
self
.
check_output_with_place
(
place
,
check_dygraph
=
False
)
class
TestOneHotOp_default_dtype_attr
(
XPUOpTest
):
def
setUp
(
self
):
self
.
op_type
=
'one_hot_v2'
depth
=
10
dimension
=
12
x_lod
=
[[
4
,
1
,
3
,
3
]]
x
=
[
np
.
random
.
randint
(
0
,
depth
-
1
)
for
i
in
range
(
sum
(
x_lod
[
0
]))]
x
=
np
.
array
(
x
).
astype
(
'int32'
).
reshape
([
sum
(
x_lod
[
0
]),
1
])
out
=
np
.
zeros
(
shape
=
(
np
.
product
(
x
.
shape
[:
-
1
]),
1
,
depth
)).
astype
(
'float32'
)
for
i
in
range
(
np
.
product
(
x
.
shape
)):
out
[
i
,
0
,
x
[
i
]]
=
1.0
self
.
inputs
=
{
'X'
:
(
x
,
x_lod
)}
self
.
attrs
=
{
'depth'
:
depth
}
self
.
outputs
=
{
'Out'
:
(
out
,
x_lod
)}
def
test_check_output
(
self
):
place
=
paddle
.
XPUPlace
(
0
)
self
.
check_output_with_place
(
place
,
check_dygraph
=
False
)
class
TestOneHotOp_out_of_range
(
XPUOpTest
):
def
setUp
(
self
):
self
.
op_type
=
'one_hot_v2'
depth
=
10
x_lod
=
[[
4
,
1
,
3
,
3
]]
x
=
[
np
.
random
.
choice
([
-
1
,
depth
])
for
i
in
range
(
sum
(
x_lod
[
0
]))]
x
=
np
.
array
(
x
).
astype
(
'int32'
).
reshape
([
sum
(
x_lod
[
0
])])
out
=
np
.
zeros
(
shape
=
(
np
.
product
(
x
.
shape
),
depth
)).
astype
(
'float32'
)
self
.
inputs
=
{
'X'
:
(
x
,
x_lod
)}
self
.
attrs
=
{
'depth'
:
depth
,
'allow_out_of_range'
:
True
}
self
.
outputs
=
{
'Out'
:
(
out
,
x_lod
)}
def
test_check_output
(
self
):
place
=
paddle
.
XPUPlace
(
0
)
self
.
check_output_with_place
(
place
,
check_dygraph
=
False
)
class
TestOneHotOpApi
(
unittest
.
TestCase
):
def
test_api
(
self
):
depth
=
10
self
.
_run
(
depth
)
def
test_api_with_depthTensor
(
self
):
depth
=
fluid
.
layers
.
assign
(
input
=
np
.
array
([
10
],
dtype
=
np
.
int32
))
self
.
_run
(
depth
)
def
test_api_with_dygraph
(
self
):
depth
=
10
label
=
np
.
array
([
np
.
random
.
randint
(
0
,
depth
-
1
)
for
i
in
range
(
6
)]).
reshape
([
6
,
1
])
with
fluid
.
dygraph
.
guard
():
one_hot_label
=
fluid
.
one_hot
(
input
=
fluid
.
dygraph
.
to_variable
(
label
),
depth
=
depth
)
def
_run
(
self
,
depth
):
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
1
],
dtype
=
"int64"
)
one_hot_label
=
fluid
.
one_hot
(
input
=
label
,
depth
=
depth
)
place
=
fluid
.
XPUPlace
(
0
)
label_data
=
np
.
array
([
np
.
random
.
randint
(
0
,
10
-
1
)
for
i
in
range
(
6
)]).
reshape
([
6
,
1
])
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
())
ret
=
exe
.
run
(
feed
=
{
'label'
:
label_data
,
},
fetch_list
=
[
one_hot_label
],
return_numpy
=
False
)
class
BadInputTestOnehotV2
(
unittest
.
TestCase
):
def
test_error
(
self
):
with
fluid
.
program_guard
(
fluid
.
Program
()):
def
test_bad_x
():
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
4
],
append_batch_size
=
False
,
dtype
=
"float32"
)
one_hot_label
=
fluid
.
one_hot
(
input
=
label
,
depth
=
4
)
self
.
assertRaises
(
TypeError
,
test_bad_x
)
if
__name__
==
'__main__'
:
paddle
.
enable_static
()
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录