Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
7eab0fa6
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
7eab0fa6
编写于
10月 21, 2021
作者:
R
ronnywang
提交者:
GitHub
10月 21, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add swish_op for npu (#36579)
上级
856cb9c5
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
153 addition
and
0 deletion
+153
-0
paddle/fluid/operators/activation_op_npu.cc
paddle/fluid/operators/activation_op_npu.cc
+78
-0
python/paddle/fluid/tests/unittests/npu/test_swish_op_npu.py
python/paddle/fluid/tests/unittests/npu/test_swish_op_npu.py
+75
-0
未找到文件。
paddle/fluid/operators/activation_op_npu.cc
浏览文件 @
7eab0fa6
...
@@ -459,6 +459,78 @@ class SigmoidGradNPUKernel : public framework::OpKernel<T> {
...
@@ -459,6 +459,78 @@ class SigmoidGradNPUKernel : public framework::OpKernel<T> {
}
}
};
};
// Swish = x * sigmoid(beta * x)
template
<
typename
T
>
class
SwishNPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
out
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
float
beta
=
ctx
.
Attr
<
float
>
(
"beta"
);
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
stream
=
ctx
.
template
device_context
<
paddle
::
platform
::
NPUDeviceContext
>()
.
stream
();
const
auto
&
muls_runner
=
NpuOpRunner
(
"Muls"
,
{
*
x
},
{
*
out
},
{{
"value"
,
beta
}});
muls_runner
.
Run
(
stream
);
const
auto
&
sigmoid_runner
=
NpuOpRunner
(
"Sigmoid"
,
{
*
out
},
{
*
out
},
{});
sigmoid_runner
.
Run
(
stream
);
const
auto
&
mul_runner
=
NpuOpRunner
(
"Mul"
,
{
*
x
,
*
out
},
{
*
out
});
mul_runner
.
Run
(
stream
);
}
};
template
<
typename
T
>
class
SwishGradNPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
dout
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
dx
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
float
beta
=
ctx
.
Attr
<
float
>
(
"beta"
);
dx
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
stream
=
ctx
.
template
device_context
<
paddle
::
platform
::
NPUDeviceContext
>()
.
stream
();
Tensor
beta_x
,
sigmoid_out
,
swish_out
;
beta_x
.
mutable_data
<
T
>
(
x
->
dims
(),
ctx
.
GetPlace
());
sigmoid_out
.
mutable_data
<
T
>
(
x
->
dims
(),
ctx
.
GetPlace
());
swish_out
.
mutable_data
<
T
>
(
x
->
dims
(),
ctx
.
GetPlace
());
const
auto
&
muls_runner
=
NpuOpRunner
(
"Muls"
,
{
*
x
},
{
beta_x
},
{{
"value"
,
beta
}});
muls_runner
.
Run
(
stream
);
const
auto
&
sigmoid_runner
=
NpuOpRunner
(
"Sigmoid"
,
{
beta_x
},
{
sigmoid_out
},
{});
sigmoid_runner
.
Run
(
stream
);
const
auto
&
mul_runner
=
NpuOpRunner
(
"Mul"
,
{
sigmoid_out
,
*
x
},
{
swish_out
},
{});
mul_runner
.
Run
(
stream
);
const
auto
&
mul_runner1
=
NpuOpRunner
(
"Mul"
,
{
sigmoid_out
,
swish_out
},
{
*
dx
},
{});
mul_runner1
.
Run
(
stream
);
const
auto
&
sub_runner
=
NpuOpRunner
(
"Sub"
,
{
swish_out
,
*
dx
},
{
*
dx
},
{});
sub_runner
.
Run
(
stream
);
const
auto
&
add_runner
=
NpuOpRunner
(
"Add"
,
{
sigmoid_out
,
*
dx
},
{
*
dx
},
{});
add_runner
.
Run
(
stream
);
const
auto
&
mul_runner2
=
NpuOpRunner
(
"Mul"
,
{
*
dout
,
*
dx
},
{
*
dx
},
{});
mul_runner2
.
Run
(
stream
);
}
};
// HardSwish = min(max(0, x+offset), threshold) * x / scale
// HardSwish = min(max(0, x+offset), threshold) * x / scale
template
<
typename
T
>
template
<
typename
T
>
class
HardSwishNPUKernel
:
public
framework
::
OpKernel
<
T
>
{
class
HardSwishNPUKernel
:
public
framework
::
OpKernel
<
T
>
{
...
@@ -936,6 +1008,12 @@ REGISTER_OP_NPU_KERNEL(
...
@@ -936,6 +1008,12 @@ REGISTER_OP_NPU_KERNEL(
ops
::
SigmoidGradNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
ops
::
SigmoidGradNPUKernel
<
paddle
::
platform
::
NPUDeviceContext
,
paddle
::
platform
::
float16
>
);
paddle
::
platform
::
float16
>
);
REGISTER_OP_NPU_KERNEL
(
swish
,
ops
::
SwishNPUKernel
<
float
>
,
ops
::
SwishNPUKernel
<
paddle
::
platform
::
float16
>
);
REGISTER_OP_NPU_KERNEL
(
swish_grad
,
ops
::
SwishGradNPUKernel
<
float
>
,
ops
::
SwishGradNPUKernel
<
paddle
::
platform
::
float16
>
);
REGISTER_OP_NPU_KERNEL
(
hard_swish
,
ops
::
HardSwishNPUKernel
<
float
>
,
REGISTER_OP_NPU_KERNEL
(
hard_swish
,
ops
::
HardSwishNPUKernel
<
float
>
,
ops
::
HardSwishNPUKernel
<
paddle
::
platform
::
float16
>
);
ops
::
HardSwishNPUKernel
<
paddle
::
platform
::
float16
>
);
...
...
python/paddle/fluid/tests/unittests/npu/test_swish_op_npu.py
0 → 100644
浏览文件 @
7eab0fa6
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
numpy
as
np
import
unittest
import
sys
sys
.
path
.
append
(
".."
)
from
paddle.fluid.tests.unittests.op_test
import
OpTest
import
paddle
import
paddle.fluid
as
fluid
from
test_activation_op
import
ref_swish
,
expit
paddle
.
enable_static
()
SEED
=
1024
class
TestSwishOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"swish"
self
.
set_npu
()
self
.
init_dtype
()
np
.
random
.
seed
(
2048
)
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
10
,
12
]).
astype
(
self
.
dtype
)
out
=
ref_swish
(
x
)
self
.
inputs
=
{
'X'
:
x
}
self
.
attrs
=
{
'beta'
:
1.0
}
self
.
outputs
=
{
'Out'
:
out
}
def
test_check_output
(
self
):
self
.
check_output_with_place
(
self
.
place
)
def
test_check_grad
(
self
):
beta
=
self
.
attrs
[
'beta'
]
out
=
self
.
outputs
[
'Out'
]
x
=
self
.
inputs
[
'X'
]
dx
=
beta
*
out
+
expit
(
x
)
*
(
1
-
beta
*
out
)
dx
=
dx
/
x
.
size
self
.
check_grad_with_place
(
self
.
place
,
[
'X'
],
'Out'
,
max_relative_error
=
0.01
,
user_defined_grads
=
[
dx
])
def
set_npu
(
self
):
self
.
__class__
.
use_npu
=
True
self
.
place
=
paddle
.
NPUPlace
(
0
)
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float32
class
TestSwishOpFp16
(
TestSwishOp
):
def
test_check_output
(
self
):
self
.
check_output_with_place
(
self
.
place
,
atol
=
1e-3
)
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float16
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录