Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
a2980169
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
a2980169
编写于
8月 02, 2022
作者:
X
xiaoguoguo626807
提交者:
GitHub
8月 02, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Eager]Menual fused_gemm_epilogue (#44748)
* manuel_fused_gemm_epilogue
上级
942ff89f
变更
8
显示空白变更内容
内联
并排
Showing
8 changed file
with
385 addition
and
1 deletion
+385
-1
paddle/fluid/eager/api/manual/fluid_manual/dygraph_forward_api.h
...fluid/eager/api/manual/fluid_manual/dygraph_forward_api.h
+6
-0
paddle/fluid/eager/api/manual/fluid_manual/forwards/CMakeLists.txt
...uid/eager/api/manual/fluid_manual/forwards/CMakeLists.txt
+1
-0
paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_gemm_epilogue_fwd_func.cc
...ual/fluid_manual/forwards/fused_gemm_epilogue_fwd_func.cc
+130
-0
paddle/fluid/eager/api/manual/fluid_manual/nodes/CMakeLists.txt
.../fluid/eager/api/manual/fluid_manual/nodes/CMakeLists.txt
+1
-0
paddle/fluid/eager/api/manual/fluid_manual/nodes/fused_gemm_epilogue_node.cc
...api/manual/fluid_manual/nodes/fused_gemm_epilogue_node.cc
+88
-0
paddle/fluid/eager/api/manual/fluid_manual/nodes/nodes.h
paddle/fluid/eager/api/manual/fluid_manual/nodes/nodes.h
+64
-0
paddle/fluid/eager/auto_code_generator/eager_generator.cc
paddle/fluid/eager/auto_code_generator/eager_generator.cc
+2
-1
python/paddle/fluid/tests/unittests/test_fused_gemm_epilogue_op.py
...ddle/fluid/tests/unittests/test_fused_gemm_epilogue_op.py
+93
-0
未找到文件。
paddle/fluid/eager/api/manual/fluid_manual/dygraph_forward_api.h
浏览文件 @
a2980169
...
@@ -101,3 +101,9 @@ fused_attention_dygraph_function(
...
@@ -101,3 +101,9 @@ fused_attention_dygraph_function(
const
paddle
::
experimental
::
Tensor
&
Ln2Scale
,
const
paddle
::
experimental
::
Tensor
&
Ln2Scale
,
const
paddle
::
experimental
::
Tensor
&
Ln2Bias
,
const
paddle
::
experimental
::
Tensor
&
Ln2Bias
,
const
paddle
::
framework
::
AttributeMap
&
attr_map
);
const
paddle
::
framework
::
AttributeMap
&
attr_map
);
paddle
::
experimental
::
Tensor
fused_gemm_epilogue_dygraph_function
(
const
paddle
::
experimental
::
Tensor
&
X
,
const
paddle
::
experimental
::
Tensor
&
Y
,
const
paddle
::
experimental
::
Tensor
&
Bias
,
const
paddle
::
framework
::
AttributeMap
&
attr_map
);
paddle/fluid/eager/api/manual/fluid_manual/forwards/CMakeLists.txt
浏览文件 @
a2980169
...
@@ -2,4 +2,5 @@ set(fluid_manual_functions
...
@@ -2,4 +2,5 @@ set(fluid_manual_functions
${
PADDLE_SOURCE_DIR
}
/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_gate_attention_fwd_func.cc
${
PADDLE_SOURCE_DIR
}
/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_gate_attention_fwd_func.cc
${
PADDLE_SOURCE_DIR
}
/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_feedforward_fwd_func.cc
${
PADDLE_SOURCE_DIR
}
/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_feedforward_fwd_func.cc
${
PADDLE_SOURCE_DIR
}
/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_attention_fwd_func.cc
${
PADDLE_SOURCE_DIR
}
/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_attention_fwd_func.cc
${
PADDLE_SOURCE_DIR
}
/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_gemm_epilogue_fwd_func.cc
PARENT_SCOPE
)
PARENT_SCOPE
)
paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_gemm_epilogue_fwd_func.cc
0 → 100644
浏览文件 @
a2980169
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/eager/accumulation/accumulation_node.h"
#include "paddle/fluid/eager/amp_auto_cast.h"
#include "paddle/fluid/eager/amp_utils.h"
#include "paddle/fluid/eager/api/manual/fluid_manual/dygraph_forward_api.h"
#include "paddle/fluid/eager/api/manual/fluid_manual/nodes/nodes.h"
#include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
#pragma GCC diagnostic ignored "-Wunused-variable"
paddle
::
experimental
::
Tensor
fused_gemm_epilogue_dygraph_function
(
const
paddle
::
experimental
::
Tensor
&
X
,
const
paddle
::
experimental
::
Tensor
&
Y
,
const
paddle
::
experimental
::
Tensor
&
Bias
,
const
paddle
::
framework
::
AttributeMap
&
attr_map
)
{
paddle
::
platform
::
RecordEvent
dygraph_entrance_record_event
(
"fused_gemm_epilogue dygraph"
,
paddle
::
platform
::
TracerEventType
::
Operator
,
1
);
VLOG
(
3
)
<<
"Running Eager Forward Op: fused_gemm_epilogue"
;
// Dygraph Forward Pass
if
(
egr
::
Controller
::
Instance
().
GetAMPLevel
()
!=
paddle
::
imperative
::
AmpLevel
::
O0
)
{
VLOG
(
5
)
<<
"Check and Prepare For AMP"
;
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
egr
::
kSlotSmallVectorSize
>
amp_tensors_vector
=
{{
X
},
{
Y
},
{
Bias
}};
auto
amp_dst_dtype
=
egr
::
GetAmpDestDtype
(
"fused_gemm_epilogue"
,
amp_tensors_vector
);
auto
NEW_X
=
egr
::
AmpAutoCast
(
"X"
,
X
,
amp_dst_dtype
,
"fused_gemm_epilogue"
);
auto
NEW_Y
=
egr
::
AmpAutoCast
(
"Y"
,
Y
,
amp_dst_dtype
,
"fused_gemm_epilogue"
);
auto
NEW_Bias
=
egr
::
AmpAutoCast
(
"Bias"
,
Bias
,
amp_dst_dtype
,
"fused_gemm_epilogue"
);
{
paddle
::
imperative
::
AutoCastGuard
guard
(
egr
::
Controller
::
Instance
().
GetCurrentTracer
(),
paddle
::
imperative
::
AmpLevel
::
O0
);
return
fused_gemm_epilogue_dygraph_function
(
NEW_X
,
NEW_Y
,
NEW_Bias
,
attr_map
);
}
}
std
::
map
<
std
::
string
,
std
::
vector
<
std
::
shared_ptr
<
egr
::
EagerVariable
>>>
ins
=
{{
"X"
,
egr
::
EagerUtils
::
TrySyncToVars
(
X
)},
{
"Y"
,
egr
::
EagerUtils
::
TrySyncToVars
(
Y
)},
{
"Bias"
,
egr
::
EagerUtils
::
TrySyncToVars
(
Bias
)}};
std
::
map
<
std
::
string
,
std
::
vector
<
std
::
shared_ptr
<
egr
::
EagerVariable
>>>
outs
=
{{
"Out"
,
{
std
::
make_shared
<
egr
::
EagerVariable
>
(
egr
::
Controller
::
Instance
().
GenerateUniqueName
())}}};
// Prepare Autograd Meta
egr
::
AutogradMeta
*
p_autograd_X
=
egr
::
EagerUtils
::
nullable_autograd_meta
(
X
);
egr
::
AutogradMeta
*
p_autograd_Y
=
egr
::
EagerUtils
::
nullable_autograd_meta
(
Y
);
egr
::
AutogradMeta
*
p_autograd_Bias
=
egr
::
EagerUtils
::
nullable_autograd_meta
(
Bias
);
bool
trace_backward
=
egr
::
Controller
::
Instance
().
HasGrad
();
bool
require_any_grad
=
egr
::
EagerUtils
::
ComputeRequireGrad
(
trace_backward
,
p_autograd_X
,
p_autograd_Y
,
p_autograd_Bias
);
paddle
::
framework
::
AttributeMap
attrs
=
attr_map
;
paddle
::
framework
::
AttributeMap
default_attrs
;
egr
::
Controller
::
Instance
().
GetCurrentTracer
()
->
TraceOp
(
"fused_gemm_epilogue"
,
ins
,
outs
,
attrs
,
egr
::
Controller
::
Instance
().
GetExpectedPlace
(),
&
default_attrs
,
true
,
{});
paddle
::
experimental
::
Tensor
Out
;
egr
::
EagerUtils
::
GetOutput
(
outs
[
"Out"
][
0
],
&
Out
);
{
paddle
::
platform
::
RecordEvent
node_creation_record_event
(
"fused_gemm_epilogue node_creation"
,
paddle
::
platform
::
TracerEventType
::
OperatorInner
,
1
);
egr
::
AutogradMeta
*
p_autograd_Out
=
egr
::
EagerUtils
::
autograd_meta
(
&
Out
);
if
(
require_any_grad
)
{
VLOG
(
6
)
<<
" Construct Grad for fused_gemm_epilogue "
;
egr
::
EagerUtils
::
PassStopGradient
(
false
,
p_autograd_Out
);
// Create GradOpNode
auto
grad_node
=
std
::
shared_ptr
<
fused_gemm_epilogueGradNodeCompat
>
(
new
fused_gemm_epilogueGradNodeCompat
(
1
,
3
));
// Set Attributes
grad_node
->
SetAttrMap
(
std
::
move
(
attrs
));
grad_node
->
SetDefaultAttrMap
(
std
::
move
(
default_attrs
));
// Set Tensor Wrappers
grad_node
->
SetTensorWrapperX
(
X
);
grad_node
->
SetTensorWrapperY
(
Y
);
grad_node
->
SetGradOutMeta
(
X
,
0
);
grad_node
->
SetGradOutMeta
(
Y
,
1
);
grad_node
->
SetGradOutMeta
(
Bias
,
2
);
egr
::
EagerUtils
::
SetOutRankWithSlot
(
p_autograd_Out
,
0
);
egr
::
EagerUtils
::
SetHistory
(
p_autograd_Out
,
grad_node
);
grad_node
->
SetGradInMeta
(
Out
,
0
);
egr
::
EagerUtils
::
CheckAndRetainGrad
(
Out
);
}
}
return
Out
;
}
paddle/fluid/eager/api/manual/fluid_manual/nodes/CMakeLists.txt
浏览文件 @
a2980169
...
@@ -2,4 +2,5 @@ set(fluid_manual_nodes
...
@@ -2,4 +2,5 @@ set(fluid_manual_nodes
${
PADDLE_SOURCE_DIR
}
/paddle/fluid/eager/api/manual/fluid_manual/nodes/fused_gate_attention_node.cc
${
PADDLE_SOURCE_DIR
}
/paddle/fluid/eager/api/manual/fluid_manual/nodes/fused_gate_attention_node.cc
${
PADDLE_SOURCE_DIR
}
/paddle/fluid/eager/api/manual/fluid_manual/nodes/fused_feedforward_node.cc
${
PADDLE_SOURCE_DIR
}
/paddle/fluid/eager/api/manual/fluid_manual/nodes/fused_feedforward_node.cc
${
PADDLE_SOURCE_DIR
}
/paddle/fluid/eager/api/manual/fluid_manual/nodes/fused_attention_node.cc
${
PADDLE_SOURCE_DIR
}
/paddle/fluid/eager/api/manual/fluid_manual/nodes/fused_attention_node.cc
${
PADDLE_SOURCE_DIR
}
/paddle/fluid/eager/api/manual/fluid_manual/nodes/fused_gemm_epilogue_node.cc
PARENT_SCOPE
)
PARENT_SCOPE
)
paddle/fluid/eager/api/manual/fluid_manual/nodes/fused_gemm_epilogue_node.cc
0 → 100644
浏览文件 @
a2980169
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "glog/logging.h"
#include "paddle/fluid/eager/api/manual/fluid_manual/nodes/nodes.h"
#include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/eager/utils.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/imperative/tracer.h"
#include "paddle/phi/api/all.h"
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
egr
::
kSlotSmallVectorSize
>
fused_gemm_epilogueGradNodeCompat
::
operator
()(
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
egr
::
kSlotSmallVectorSize
>&
grads
,
bool
create_graph
,
bool
is_new_grad
)
{
const
auto
&
out_metas
=
OutputMeta
();
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
egr
::
kSlotSmallVectorSize
>
outputs
(
3
);
VLOG
(
3
)
<<
"Running Eager Backward Node: fused_gemm_epilogueGradNodeCompat"
;
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
egr
::
kSlotSmallVectorSize
>
hooked_grads0
=
fused_gemm_epilogueGradNodeCompat
::
ApplyGradientHooks
(
grads
);
std
::
map
<
std
::
string
,
std
::
vector
<
std
::
shared_ptr
<
egr
::
EagerVariable
>>>
ins0
=
{{
"DOut"
,
egr
::
EagerUtils
::
TrySyncToVars
(
hooked_grads0
[
0
])},
{
"X"
,
egr
::
EagerUtils
::
TrySyncToVars
(
egr
::
EagerUtils
::
RecoverTensorWrapper
(
&
this
->
X_
))},
{
"Y"
,
egr
::
EagerUtils
::
TrySyncToVars
(
egr
::
EagerUtils
::
RecoverTensorWrapper
(
&
this
->
Y_
))}};
std
::
map
<
std
::
string
,
std
::
vector
<
std
::
shared_ptr
<
egr
::
EagerVariable
>>>
outs0
;
if
((
!
out_metas
[
2
].
empty
())
&&
(
!
(
out_metas
[
2
][
0
].
IsStopGradient
())))
{
outs0
.
insert
({
"DBias"
,
{
std
::
make_shared
<
egr
::
EagerVariable
>
(
egr
::
Controller
::
Instance
().
GenerateUniqueName
())}});
}
if
((
!
out_metas
[
0
].
empty
())
&&
(
!
(
out_metas
[
0
][
0
].
IsStopGradient
())))
{
outs0
.
insert
({
"DX"
,
{
std
::
make_shared
<
egr
::
EagerVariable
>
(
egr
::
Controller
::
Instance
().
GenerateUniqueName
())}});
}
if
((
!
out_metas
[
1
].
empty
())
&&
(
!
(
out_metas
[
1
][
0
].
IsStopGradient
())))
{
outs0
.
insert
({
"DY"
,
{
std
::
make_shared
<
egr
::
EagerVariable
>
(
egr
::
Controller
::
Instance
().
GenerateUniqueName
())}});
}
auto
&
attrs_map0
=
this
->
attr_map_
;
// Pass the entire attribute map to TraceOp
// The underlying kernel will pickup whatever attribute they need at runtime
egr
::
Controller
::
Instance
().
GetCurrentTracer
()
->
TraceOp
(
"fused_gemm_epilogue_grad"
,
ins0
,
outs0
,
attrs_map0
,
egr
::
Controller
::
Instance
().
GetExpectedPlace
(),
&
this
->
default_attr_map_
,
true
,
{});
if
(
outs0
.
find
(
"DBias"
)
!=
outs0
.
end
())
{
outputs
[
2
]
=
egr
::
EagerUtils
::
GetOutputs
(
outs0
[
"DBias"
]);
}
if
(
outs0
.
find
(
"DX"
)
!=
outs0
.
end
())
{
outputs
[
0
]
=
egr
::
EagerUtils
::
GetOutputs
(
outs0
[
"DX"
]);
}
if
(
outs0
.
find
(
"DY"
)
!=
outs0
.
end
())
{
outputs
[
1
]
=
egr
::
EagerUtils
::
GetOutputs
(
outs0
[
"DY"
]);
}
if
(
NeedComplexToRealConversion
())
HandleComplexGradToRealGrad
(
&
outputs
);
return
outputs
;
}
paddle/fluid/eager/api/manual/fluid_manual/nodes/nodes.h
浏览文件 @
a2980169
...
@@ -531,3 +531,67 @@ class fused_attentionGradNodeCompat : public egr::GradNodeBase {
...
@@ -531,3 +531,67 @@ class fused_attentionGradNodeCompat : public egr::GradNodeBase {
paddle
::
framework
::
AttributeMap
attr_map_
;
paddle
::
framework
::
AttributeMap
attr_map_
;
paddle
::
framework
::
AttributeMap
default_attr_map_
;
paddle
::
framework
::
AttributeMap
default_attr_map_
;
};
};
class
fused_gemm_epilogueGradNodeCompat
:
public
egr
::
GradNodeBase
{
public:
fused_gemm_epilogueGradNodeCompat
()
:
egr
::
GradNodeBase
()
{
VLOG
(
7
)
<<
" Construct fused_gemm_epilogueGradNodeCompat "
;
}
fused_gemm_epilogueGradNodeCompat
(
size_t
bwd_in_slot_num
,
size_t
bwd_out_slot_num
)
:
egr
::
GradNodeBase
(
bwd_in_slot_num
,
bwd_out_slot_num
)
{
VLOG
(
7
)
<<
" Construct fused_gemm_epilogueGradNodeCompat "
;
}
~
fused_gemm_epilogueGradNodeCompat
()
override
{
VLOG
(
6
)
<<
" Destruct fused_gemm_epilogueGradNodeCompat "
;
}
virtual
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
egr
::
kSlotSmallVectorSize
>
operator
()(
paddle
::
small_vector
<
std
::
vector
<
paddle
::
experimental
::
Tensor
>
,
// NOLINT
egr
::
kSlotSmallVectorSize
>&
grads
,
// NOLINT
bool
create_graph
=
false
,
bool
is_new_grad
=
false
)
override
;
void
ClearTensorWrappers
()
override
{
X_
.
clear
();
Y_
.
clear
();
SetIsTensorWrappersCleared
(
true
);
}
std
::
string
name
()
override
{
return
"fused_gemm_epilogueGradNodeCompat"
;
}
std
::
shared_ptr
<
GradNodeBase
>
Copy
()
const
override
{
{
auto
copied_node
=
std
::
shared_ptr
<
fused_gemm_epilogueGradNodeCompat
>
(
new
fused_gemm_epilogueGradNodeCompat
(
*
this
));
return
copied_node
;
}
}
// SetX, SetY, ...
void
SetTensorWrapperX
(
const
paddle
::
experimental
::
Tensor
&
X
)
{
X_
=
egr
::
TensorWrapper
(
X
,
false
);
}
void
SetTensorWrapperY
(
const
paddle
::
experimental
::
Tensor
&
Y
)
{
Y_
=
egr
::
TensorWrapper
(
Y
,
false
);
}
// SetAttrMap
void
SetAttrMap
(
paddle
::
framework
::
AttributeMap
&&
attr_map
)
{
attr_map_
=
std
::
move
(
attr_map
);
}
void
SetDefaultAttrMap
(
paddle
::
framework
::
AttributeMap
&&
default_attr_map
)
{
default_attr_map_
=
std
::
move
(
default_attr_map
);
}
private:
// TensorWrappers
egr
::
TensorWrapper
X_
;
egr
::
TensorWrapper
Y_
;
// Attribute Map
paddle
::
framework
::
AttributeMap
attr_map_
;
paddle
::
framework
::
AttributeMap
default_attr_map_
;
};
paddle/fluid/eager/auto_code_generator/eager_generator.cc
浏览文件 @
a2980169
...
@@ -54,7 +54,8 @@ static std::unordered_set<std::string> ops_to_fill_zero_for_empty_grads = {
...
@@ -54,7 +54,8 @@ static std::unordered_set<std::string> ops_to_fill_zero_for_empty_grads = {
static
std
::
unordered_set
<
std
::
string
>
black_ops_list
=
{
"run_program"
,
static
std
::
unordered_set
<
std
::
string
>
black_ops_list
=
{
"run_program"
,
"fused_gate_attention"
,
"fused_gate_attention"
,
"fused_feedforward"
,
"fused_feedforward"
,
"fused_attention"
};
"fused_attention"
,
"fused_gemm_epilogue"
};
static
std
::
string
LegalizeVariableName
(
const
std
::
string
&
var_name
)
{
static
std
::
string
LegalizeVariableName
(
const
std
::
string
&
var_name
)
{
std
::
string
ret
=
var_name
;
std
::
string
ret
=
var_name
;
...
...
python/paddle/fluid/tests/unittests/test_fused_gemm_epilogue_op.py
浏览文件 @
a2980169
...
@@ -22,6 +22,13 @@ import paddle.fluid.core as core
...
@@ -22,6 +22,13 @@ import paddle.fluid.core as core
from
op_test
import
OpTest
,
skip_check_grad_ci
,
skip_check_inplace_ci
from
op_test
import
OpTest
,
skip_check_grad_ci
,
skip_check_inplace_ci
def
is_fused_gemm_epilogue_supported
():
if
paddle
.
is_compiled_with_cuda
()
and
not
paddle
.
is_compiled_with_rocm
():
return
hasattr
(
core
.
eager
.
ops
,
'fused_gemm_epilogue'
)
else
:
return
False
def
gelu
(
x
):
def
gelu
(
x
):
y_ref
=
0.5
*
x
*
(
y_ref
=
0.5
*
x
*
(
1.0
+
np
.
tanh
(
np
.
sqrt
(
2
/
np
.
pi
)
*
(
x
+
0.044715
*
np
.
power
(
x
,
3
))))
1.0
+
np
.
tanh
(
np
.
sqrt
(
2
/
np
.
pi
)
*
(
x
+
0.044715
*
np
.
power
(
x
,
3
))))
...
@@ -480,6 +487,92 @@ class TestFuseGemmEpilogueOpNoneMMFP64(TestFuseGemmEpilogueOpNoneMMFP16):
...
@@ -480,6 +487,92 @@ class TestFuseGemmEpilogueOpNoneMMFP64(TestFuseGemmEpilogueOpNoneMMFP16):
self
.
atol
=
1e-6
self
.
atol
=
1e-6
def
matmul
(
x
,
y
,
bias
,
trans_x
,
trans_y
):
x
=
np
.
array
(
x
)
if
trans_x
:
x
=
np
.
ascontiguousarray
(
np
.
transpose
(
x
))
if
trans_y
:
y
=
np
.
ascontiguousarray
(
np
.
transpose
(
y
))
z
=
np
.
matmul
(
x
,
y
)
if
bias
is
None
:
return
z
else
:
return
z
+
bias
def
matmul_grad
(
x
,
y
,
bias
,
dz
,
trans_x
,
trans_y
):
if
trans_x
:
if
trans_y
:
dx
=
matmul
(
y
,
dz
,
None
,
True
,
True
)
dy
=
matmul
(
dz
,
x
,
None
,
True
,
True
)
else
:
dx
=
matmul
(
y
,
dz
,
None
,
False
,
True
)
dy
=
matmul
(
x
,
dz
,
None
,
False
,
False
)
else
:
if
trans_y
:
dx
=
matmul
(
dz
,
y
,
None
,
False
,
False
)
dy
=
matmul
(
dz
,
x
,
None
,
True
,
False
)
else
:
dx
=
matmul
(
dz
,
y
,
None
,
False
,
True
)
dy
=
matmul
(
x
,
dz
,
None
,
True
,
False
)
if
bias
is
None
:
dbias
=
None
else
:
dbias
=
np
.
sum
(
dz
,
axis
=
0
,
keepdims
=
False
)
return
dx
,
dy
,
dbias
@
unittest
.
skipIf
(
not
is_fused_gemm_epilogue_supported
(),
"fused_gemm_epilogue is only supported when CUDA version >= 11.6"
)
class
TestEagerFusedGemmEpilogue
(
unittest
.
TestCase
):
def
setUp
(
self
):
paddle
.
set_device
(
'gpu'
)
def
test_case_act
(
self
):
paddle
.
disable_static
()
x_np
=
np
.
random
.
random
((
8
,
4
)).
astype
(
np
.
float64
)
-
0.5
y_np
=
np
.
random
.
random
((
4
,
128
)).
astype
(
np
.
float64
)
-
0.5
bias_np
=
np
.
random
.
random
((
128
,
)).
astype
(
np
.
float64
)
-
0.5
x
=
paddle
.
to_tensor
(
x_np
)
y
=
paddle
.
to_tensor
(
y_np
)
bias
=
paddle
.
to_tensor
(
bias_np
)
x
.
stop_gradient
=
False
y
.
stop_gradient
=
False
out1
=
core
.
eager
.
ops
.
fused_gemm_epilogue
(
x
,
y
,
bias
,
'trans_x'
,
False
,
'trans_y'
,
False
,
'activation'
,
'none'
)
out2
=
core
.
eager
.
ops
.
fused_gemm_epilogue
(
x
,
y
,
bias
,
'trans_x'
,
False
,
'trans_y'
,
False
,
'activation'
,
'relu'
)
out3
=
core
.
eager
.
ops
.
fused_gemm_epilogue
(
x
,
y
,
bias
,
'trans_x'
,
False
,
'trans_y'
,
False
,
'activation'
,
'gelu'
)
out_np1
=
get_output
(
x_np
,
y_np
,
bias_np
,
'none'
)
out_np2
=
get_output
(
x_np
,
y_np
,
bias_np
,
'relu'
)
out_np3
=
get_output
(
x_np
,
y_np
,
bias_np
,
'gelu'
)
self
.
assertTrue
(
np
.
allclose
(
out1
,
out_np1
))
self
.
assertTrue
(
np
.
allclose
(
out2
,
out_np2
))
self
.
assertTrue
(
np
.
allclose
(
out3
,
out_np3
))
out_grad_np1
=
np
.
random
.
randint
(
low
=-
20
,
high
=
20
,
size
=
out_np1
.
shape
).
astype
(
np
.
float64
)
paddle
.
autograd
.
backward
(
out1
,
grad_tensors
=
[
paddle
.
to_tensor
(
out_grad_np1
)])
x_grad_np
,
y_grad_np
,
bias_grad_np
=
matmul_grad
(
x_np
,
y_np
,
bias_np
,
out_grad_np1
,
False
,
False
)
self
.
assertTrue
(
np
.
allclose
(
x
.
grad
.
numpy
(),
x_grad_np
))
self
.
assertEqual
(
y_grad_np
.
shape
,
y_np
.
shape
)
self
.
assertTrue
(
np
.
allclose
(
y
.
grad
.
numpy
(),
y_grad_np
))
paddle
.
enable_static
()
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
paddle
.
enable_static
()
paddle
.
enable_static
()
np
.
random
.
seed
(
0
)
np
.
random
.
seed
(
0
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录