Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
60bec700
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
60bec700
编写于
4月 19, 2022
作者:
z8hanghuan
提交者:
GitHub
4月 19, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
support bmm&bmm_grad for KL2, *test=kunlun (#41935)
上级
4f461ab9
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
317 addition
and
0 deletion
+317
-0
paddle/fluid/operators/bmm_op_xpu.cc
paddle/fluid/operators/bmm_op_xpu.cc
+211
-0
paddle/fluid/platform/device/xpu/xpu2_op_list.h
paddle/fluid/platform/device/xpu/xpu2_op_list.h
+2
-0
python/paddle/fluid/tests/unittests/xpu/test_bmm_op_xpu.py
python/paddle/fluid/tests/unittests/xpu/test_bmm_op_xpu.py
+104
-0
未找到文件。
paddle/fluid/operators/bmm_op_xpu.cc
0 → 100644
浏览文件 @
60bec700
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifdef PADDLE_WITH_XPU
#include <string>
#include <vector>
#include "paddle/fluid/operators/matmul_v2_op.h"
#include "paddle/fluid/operators/xpu_api_wrapper.h"
#include "paddle/fluid/platform/device/device_wrapper.h"
namespace
paddle
{
namespace
operators
{
template
<
typename
T
,
typename
FCT
>
static
void
MatMulXPUFunction
(
const
Tensor
*
x
,
const
Tensor
*
y
,
Tensor
*
out
,
bool
trans_x
,
bool
trans_y
,
const
paddle
::
framework
::
ExecutionContext
&
ctx
)
{
using
XPUType
=
typename
XPUTypeTrait
<
T
>::
Type
;
const
auto
&
x_dims
=
x
->
dims
();
const
auto
&
y_dims
=
y
->
dims
();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
paddle
::
platform
::
XPUDeviceContext
>();
auto
mat_dim_a
=
phi
::
funcs
::
CreateMatrixDescriptor
(
RowMatrixFromVector
(
x_dims
),
0
,
trans_x
);
auto
mat_dim_b
=
phi
::
funcs
::
CreateMatrixDescriptor
(
ColumnMatrixFromVector
(
y_dims
),
0
,
trans_y
);
T
*
data_c
=
out
->
data
<
T
>
();
int
m
=
mat_dim_a
.
height_
;
int
n
=
mat_dim_b
.
width_
;
int
k
=
mat_dim_a
.
width_
;
int
batch_size
=
mat_dim_a
.
batch_size_
;
// batch matmul
int
r
=
xpu
::
fc_batched
<
XPUType
,
XPUType
,
XPUType
,
FCT
>
(
dev_ctx
.
x_context
(),
// Context* ctx,
batch_size
,
// int batch_size,
mat_dim_a
.
trans_
,
// bool x_trans,
mat_dim_b
.
trans_
,
// bool w_trans,
m
,
// int m,
n
,
// int n,
k
,
// int k,
1.0
,
// float alpha,
reinterpret_cast
<
const
XPUType
*>
(
x
->
data
<
T
>
()),
// const TX* x,
mat_dim_a
.
stride_
,
// int stride_a,
reinterpret_cast
<
const
XPUType
*>
(
y
->
data
<
T
>
()),
// const TW* w,
mat_dim_b
.
stride_
,
// int stride_b,
0.0
,
// float beta,
reinterpret_cast
<
XPUType
*>
(
data_c
),
// TY* y,
m
*
n
,
// int stride_c,
nullptr
,
// const float* x_maxptr,
nullptr
);
// const float* w_maxptr
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"fc_batched"
);
}
template
<
typename
T
>
class
BmmXPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
y
=
ctx
.
Input
<
Tensor
>
(
"Y"
);
auto
*
out
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
if
(
x
->
numel
()
==
0
||
y
->
numel
()
==
0
)
{
return
;
}
bool
trans_x
=
false
;
bool
trans_y
=
false
;
auto
x_dims
=
x
->
dims
();
auto
y_dims
=
y
->
dims
();
PADDLE_ENFORCE_EQ
(
x_dims
.
size
(),
3
,
platform
::
errors
::
InvalidArgument
(
"Input(X) of BmmOp must be 3-dimensional in BmmOp, "
"but received X's shape: [%s]."
,
x_dims
));
PADDLE_ENFORCE_EQ
(
y_dims
.
size
(),
3
,
platform
::
errors
::
InvalidArgument
(
"Input(Y) of BmmOp must be 3-dimensional in BmmOp, "
"but received Y's shape: [%s]."
,
y_dims
));
PADDLE_ENFORCE_EQ
(
x_dims
[
0
],
y_dims
[
0
],
platform
::
errors
::
InvalidArgument
(
"Input(X) and Input(Y) must have the same batch size in BmmOp, "
"but received X's batch size: [%s],"
"Y's batch size [%s]"
,
x_dims
[
0
],
y_dims
[
0
]));
PADDLE_ENFORCE_EQ
(
x_dims
[
2
],
y_dims
[
1
],
platform
::
errors
::
InvalidArgument
(
"Input(X)'s width must be equal with Input(Y)'s height in BmmOp,"
"but receive X's width: [%s],"
"Y's height: [%s]."
,
x_dims
[
2
],
y_dims
[
1
]));
if
(
std
::
is_same
<
paddle
::
platform
::
float16
,
T
>::
value
)
{
MatMulXPUFunction
<
T
,
int16_t
>
(
x
,
y
,
out
,
trans_x
,
trans_y
,
ctx
);
}
else
{
if
(
std
::
getenv
(
"XPU_PADDLE_FC_INT32"
)
!=
nullptr
)
{
MatMulXPUFunction
<
T
,
int32_t
>
(
x
,
y
,
out
,
trans_x
,
trans_y
,
ctx
);
}
else
if
(
std
::
getenv
(
"XPU_PADDLE_FC_LOCAL_INT16"
)
!=
nullptr
)
{
MatMulXPUFunction
<
T
,
float
>
(
x
,
y
,
out
,
trans_x
,
trans_y
,
ctx
);
}
else
{
MatMulXPUFunction
<
T
,
int16_t
>
(
x
,
y
,
out
,
trans_x
,
trans_y
,
ctx
);
}
}
}
};
template
<
typename
T
>
class
BmmXPUGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
MatMul
(
const
framework
::
ExecutionContext
&
ctx
,
const
framework
::
Tensor
&
a
,
bool
trans_a
,
const
framework
::
Tensor
&
b
,
bool
trans_b
,
framework
::
Tensor
*
out
)
const
{
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
if
(
std
::
is_same
<
paddle
::
platform
::
float16
,
T
>::
value
)
{
MatMulXPUFunction
<
T
,
int16_t
>
(
&
a
,
&
b
,
out
,
trans_a
,
trans_b
,
ctx
);
}
else
{
if
(
std
::
getenv
(
"XPU_PADDLE_FC_INT32"
)
!=
nullptr
)
{
MatMulXPUFunction
<
T
,
int32_t
>
(
&
a
,
&
b
,
out
,
trans_a
,
trans_b
,
ctx
);
}
else
if
(
std
::
getenv
(
"XPU_PADDLE_FC_LOCAL_INT16"
)
!=
nullptr
)
{
MatMulXPUFunction
<
T
,
float
>
(
&
a
,
&
b
,
out
,
trans_a
,
trans_b
,
ctx
);
}
else
{
MatMulXPUFunction
<
T
,
int16_t
>
(
&
a
,
&
b
,
out
,
trans_a
,
trans_b
,
ctx
);
}
}
}
void
CalcInputGrad
(
const
framework
::
ExecutionContext
&
context
,
const
framework
::
Tensor
&
a
,
bool
trans_a
,
const
framework
::
Tensor
&
b
,
bool
trans_b
,
framework
::
Tensor
*
out
)
const
{
if
(
out
==
nullptr
)
return
;
MatMul
(
context
,
a
,
trans_a
,
b
,
trans_b
,
out
);
}
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
x
=
*
context
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
y
=
*
context
.
Input
<
framework
::
Tensor
>
(
"Y"
);
auto
dout
=
*
context
.
Input
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
dx
=
context
.
Output
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
*
dy
=
context
.
Output
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
ReshapeXYOutIntoMatrixSequence
(
&
x
,
&
y
,
&
dout
,
false
,
false
);
framework
::
DDim
dx_dims
;
if
(
dx
)
{
dx_dims
=
dx
->
dims
();
if
(
dx_dims
!=
x
.
dims
())
{
dx
->
Resize
(
x
.
dims
());
}
}
framework
::
DDim
dy_dims
;
if
(
dy
)
{
dy_dims
=
dy
->
dims
();
if
(
dy_dims
!=
y
.
dims
())
{
dy
->
Resize
(
y
.
dims
());
}
}
CalcInputGrad
(
context
,
dout
,
false
,
y
,
true
,
dx
);
CalcInputGrad
(
context
,
x
,
true
,
dout
,
false
,
dy
);
// CalcInputGrad(context, dout, false, false, y, true, false, dx);
// CalcInputGrad(context, x, true, true, dout, false, true, dy);
if
(
dx
)
{
if
(
dx_dims
!=
x
.
dims
())
{
dx
->
Resize
(
dx_dims
);
}
}
if
(
dy
)
{
if
(
dy_dims
!=
y
.
dims
())
{
dy
->
Resize
(
dy_dims
);
}
}
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_XPU_KERNEL
(
bmm
,
ops
::
BmmXPUKernel
<
float
>
,
ops
::
BmmXPUKernel
<
plat
::
float16
>
);
REGISTER_OP_XPU_KERNEL
(
bmm_grad
,
ops
::
BmmXPUGradKernel
<
float
>
,
ops
::
BmmXPUGradKernel
<
plat
::
float16
>
);
#endif
paddle/fluid/platform/device/xpu/xpu2_op_list.h
浏览文件 @
60bec700
...
...
@@ -43,6 +43,8 @@ XPUOpMap& get_kl2_ops() {
{
"batch_norm_grad"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"batch_norm"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"bmm"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"bmm_grad"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"bce_loss_grad"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
{
"bce_loss"
,
XPUKernelSet
({
pOpKernelType
(
vartype
::
FP32
,
XPUPlace
())})},
...
...
python/paddle/fluid/tests/unittests/xpu/test_bmm_op_xpu.py
0 → 100644
浏览文件 @
60bec700
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at #
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
sys
sys
.
path
.
append
(
".."
)
import
paddle
import
paddle.fluid.core
as
core
import
paddle.fluid
as
fluid
import
paddle.tensor
as
tensor
import
unittest
import
numpy
as
np
from
op_test
import
OpTest
from
op_test_xpu
import
XPUOpTest
from
paddle.fluid.framework
import
Program
,
program_guard
from
xpu.get_test_cover_info
import
create_test_class
,
get_xpu_op_support_types
,
XPUOpTestWrapper
paddle
.
enable_static
()
class
XPUTestBmmOp
(
XPUOpTestWrapper
):
"""
func desc:: https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api/paddle/bmm_cn.html#bmm
"""
def
__init__
(
self
):
self
.
op_name
=
'bmm'
self
.
use_dynamic_create_class
=
False
class
TestBmmOp
(
XPUOpTest
):
def
setUp
(
self
):
self
.
init_dtype
()
self
.
set_xpu
()
self
.
op_type
=
"bmm"
self
.
place
=
paddle
.
XPUPlace
(
0
)
self
.
set_shape
()
X
=
np
.
random
.
random
(
self
.
Xshape
).
astype
(
self
.
dtype
)
Y
=
np
.
random
.
random
(
self
.
Yshape
).
astype
(
self
.
dtype
)
self
.
inputs
=
{
'X'
:
X
,
'Y'
:
Y
}
Out
=
np
.
matmul
(
X
,
Y
)
self
.
outputs
=
{
'Out'
:
Out
}
def
init_dtype
(
self
):
self
.
dtype
=
self
.
in_type
def
set_shape
(
self
):
self
.
Xshape
=
(
10
,
3
,
4
)
self
.
Yshape
=
(
10
,
4
,
5
)
def
set_xpu
(
self
):
self
.
__class__
.
use_xpu
=
True
self
.
__class__
.
no_need_check_grad
=
False
self
.
__class__
.
op_type
=
self
.
in_type
def
test_check_output
(
self
):
self
.
check_output_with_place
(
self
.
place
)
def
test_check_grad_normal
(
self
):
self
.
check_grad_with_place
(
self
.
place
,
[
'X'
,
'Y'
],
'Out'
)
class
TestBmmOp1
(
TestBmmOp
):
def
set_shape
(
self
):
self
.
Xshape
=
(
3
,
3
,
3
)
self
.
Yshape
=
(
3
,
3
,
3
)
class
TestBmmOp2
(
TestBmmOp
):
def
set_shape
(
self
):
self
.
Xshape
=
(
128
,
3
,
16
)
self
.
Yshape
=
(
128
,
16
,
3
)
class
TestBmmOp3
(
TestBmmOp
):
def
set_shape
(
self
):
self
.
Xshape
=
(
2048
,
16
,
27
)
self
.
Yshape
=
(
2048
,
27
,
16
)
class
TestBmmOp4
(
TestBmmOp
):
def
set_shape
(
self
):
self
.
Xshape
=
(
2
,
27
,
27
)
self
.
Yshape
=
(
2
,
27
,
27
)
class
TestBmmOp5
(
TestBmmOp
):
def
set_shape
(
self
):
self
.
Xshape
=
(
2
,
1
,
1
)
self
.
Yshape
=
(
2
,
1
,
1
)
support_types
=
get_xpu_op_support_types
(
'bmm'
)
for
stype
in
support_types
:
create_test_class
(
globals
(),
XPUTestBmmOp
,
stype
)
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录