Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
657c69bc
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
657c69bc
编写于
8月 31, 2022
作者:
W
Wilber
提交者:
GitHub
8月 31, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix fused cuda op's mutable data [3] (#45564)
上级
9034ca70
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
15 addition
and
8 deletion
+15
-8
paddle/fluid/operators/fused/fusion_group_op.h
paddle/fluid/operators/fused/fusion_group_op.h
+10
-4
paddle/fluid/operators/fused/fusion_transpose_flatten_concat_op.cu.cc
.../operators/fused/fusion_transpose_flatten_concat_op.cu.cc
+2
-2
paddle/fluid/operators/fused/skip_layernorm_op.cu
paddle/fluid/operators/fused/skip_layernorm_op.cu
+2
-1
paddle/fluid/operators/fused/yolo_box_head_op.cu
paddle/fluid/operators/fused/yolo_box_head_op.cu
+1
-1
未找到文件。
paddle/fluid/operators/fused/fusion_group_op.h
浏览文件 @
657c69bc
...
...
@@ -23,17 +23,22 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
template
<
typename
DeviceContext
>
static
void
MutableMultiTypeData
(
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>*
var
,
const
std
::
vector
<
int
>&
data_type
,
const
DeviceContext
&
dev_ctx
,
const
platform
::
Place
&
place
)
{
for
(
size_t
i
=
0
;
i
<
var
->
size
();
i
++
)
{
if
(
data_type
[
i
]
==
framework
::
proto
::
VarType
::
FP32
)
{
(
*
var
)[
i
]
->
mutable_data
<
float
>
(
place
);
dev_ctx
.
template
Alloc
<
float
>((
*
var
)[
i
],
(
*
var
)[
i
]
->
numel
()
*
sizeof
(
float
));
}
else
if
(
data_type
[
i
]
==
framework
::
proto
::
VarType
::
FP16
)
{
(
*
var
)[
i
]
->
mutable_data
<
paddle
::
platform
::
float16
>
(
place
);
dev_ctx
.
template
Alloc
<
paddle
::
platform
::
float16
>(
(
*
var
)[
i
],
(
*
var
)[
i
]
->
numel
()
*
sizeof
(
paddle
::
platform
::
float16
));
}
else
if
(
data_type
[
i
]
==
framework
::
proto
::
VarType
::
FP64
)
{
(
*
var
)[
i
]
->
mutable_data
<
double
>
(
place
);
dev_ctx
.
template
Alloc
<
double
>((
*
var
)[
i
],
(
*
var
)[
i
]
->
numel
()
*
sizeof
(
double
));
}
}
}
...
...
@@ -52,8 +57,9 @@ class FusionGroupKernel : public framework::OpKernel<T> {
size_t
num_outs
=
outs
.
size
();
auto
place
=
ctx
.
GetPlace
();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
MutableMultiTypeData
(
&
outs
,
outs_dtype
,
place
);
MutableMultiTypeData
(
&
outs
,
outs_dtype
,
dev_ctx
,
place
);
std
::
string
func_name
=
ctx
.
Attr
<
std
::
string
>
(
"func_name"
);
platform
::
DeviceCode
*
dev_code
=
...
...
paddle/fluid/operators/fused/fusion_transpose_flatten_concat_op.cu.cc
浏览文件 @
657c69bc
...
...
@@ -30,7 +30,8 @@ class TransposeFlattenConcatFusionKernel : public framework::OpKernel<T> {
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
ins
=
ctx
.
MultiInput
<
framework
::
Tensor
>
(
"X"
);
auto
*
out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Out"
);
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
&
dev_ctx
=
ctx
.
template
device_context
<
phi
::
GPUContext
>();
dev_ctx
.
Alloc
<
T
>
(
out
,
out
->
numel
()
*
sizeof
(
T
));
auto
odims
=
out
->
dims
();
std
::
vector
<
int
>
trans_axis
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"trans_axis"
);
...
...
@@ -52,7 +53,6 @@ class TransposeFlattenConcatFusionKernel : public framework::OpKernel<T> {
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
out_desc
));
cudnnDataType_t
cudnn_dtype
=
CudnnDataType
<
T
>::
type
;
auto
&
dev_ctx
=
ctx
.
template
device_context
<
phi
::
GPUContext
>();
auto
handle
=
dev_ctx
.
cudnn_handle
();
T
*
odata
=
out
->
data
<
T
>
();
...
...
paddle/fluid/operators/fused/skip_layernorm_op.cu
浏览文件 @
657c69bc
...
...
@@ -44,7 +44,8 @@ class SkipLayerNormKernel : public framework::OpKernel<T> {
auto
*
out
=
context
.
Output
<
framework
::
Tensor
>
(
"Out"
);
out
->
Resize
(
X
->
dims
());
auto
*
output_d
=
out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
&
dev_ctx
=
context
.
template
device_context
<
phi
::
GPUContext
>();
auto
*
output_d
=
dev_ctx
.
Alloc
<
T
>
(
out
,
out
->
numel
()
*
sizeof
(
T
));
size_t
num
=
1
;
for
(
size_t
i
=
0
;
i
<
X
->
dims
().
size
();
i
++
)
{
...
...
paddle/fluid/operators/fused/yolo_box_head_op.cu
浏览文件 @
657c69bc
...
...
@@ -81,7 +81,7 @@ class YoloBoxHeadKernel : public framework::OpKernel<T> {
const
int
grid_size_y
=
h
;
const
int
anchors_num
=
anchors
.
size
()
/
2
;
const
T
*
input_data
=
x
->
data
<
T
>
();
T
*
output_data
=
out
->
mutable_data
<
T
>
(
context
.
GetPlace
(
));
T
*
output_data
=
device_ctx
.
Alloc
<
T
>
(
out
,
out
->
numel
()
*
sizeof
(
T
));
auto
stream
=
device_ctx
.
stream
();
const
int
volume
=
x_dims
[
1
]
*
h
*
w
;
dim3
block
(
16
,
16
,
4
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录