未验证 提交 7eab0fa6 编写于 作者: R ronnywang 提交者: GitHub

add swish_op for npu (#36579)

上级 856cb9c5
...@@ -459,6 +459,78 @@ class SigmoidGradNPUKernel : public framework::OpKernel<T> { ...@@ -459,6 +459,78 @@ class SigmoidGradNPUKernel : public framework::OpKernel<T> {
} }
}; };
// Swish = x * sigmoid(beta * x)
template <typename T>
class SwishNPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* x = ctx.Input<Tensor>("X");
auto* out = ctx.Output<Tensor>("Out");
float beta = ctx.Attr<float>("beta");
out->mutable_data<T>(ctx.GetPlace());
auto stream =
ctx.template device_context<paddle::platform::NPUDeviceContext>()
.stream();
const auto& muls_runner =
NpuOpRunner("Muls", {*x}, {*out}, {{"value", beta}});
muls_runner.Run(stream);
const auto& sigmoid_runner = NpuOpRunner("Sigmoid", {*out}, {*out}, {});
sigmoid_runner.Run(stream);
const auto& mul_runner = NpuOpRunner("Mul", {*x, *out}, {*out});
mul_runner.Run(stream);
}
};
template <typename T>
class SwishGradNPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* x = ctx.Input<Tensor>("X");
auto* dout = ctx.Input<Tensor>(framework::GradVarName("Out"));
auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));
float beta = ctx.Attr<float>("beta");
dx->mutable_data<T>(ctx.GetPlace());
auto stream =
ctx.template device_context<paddle::platform::NPUDeviceContext>()
.stream();
Tensor beta_x, sigmoid_out, swish_out;
beta_x.mutable_data<T>(x->dims(), ctx.GetPlace());
sigmoid_out.mutable_data<T>(x->dims(), ctx.GetPlace());
swish_out.mutable_data<T>(x->dims(), ctx.GetPlace());
const auto& muls_runner =
NpuOpRunner("Muls", {*x}, {beta_x}, {{"value", beta}});
muls_runner.Run(stream);
const auto& sigmoid_runner =
NpuOpRunner("Sigmoid", {beta_x}, {sigmoid_out}, {});
sigmoid_runner.Run(stream);
const auto& mul_runner =
NpuOpRunner("Mul", {sigmoid_out, *x}, {swish_out}, {});
mul_runner.Run(stream);
const auto& mul_runner1 =
NpuOpRunner("Mul", {sigmoid_out, swish_out}, {*dx}, {});
mul_runner1.Run(stream);
const auto& sub_runner = NpuOpRunner("Sub", {swish_out, *dx}, {*dx}, {});
sub_runner.Run(stream);
const auto& add_runner = NpuOpRunner("Add", {sigmoid_out, *dx}, {*dx}, {});
add_runner.Run(stream);
const auto& mul_runner2 = NpuOpRunner("Mul", {*dout, *dx}, {*dx}, {});
mul_runner2.Run(stream);
}
};
// HardSwish = min(max(0, x+offset), threshold) * x / scale // HardSwish = min(max(0, x+offset), threshold) * x / scale
template <typename T> template <typename T>
class HardSwishNPUKernel : public framework::OpKernel<T> { class HardSwishNPUKernel : public framework::OpKernel<T> {
...@@ -936,6 +1008,12 @@ REGISTER_OP_NPU_KERNEL( ...@@ -936,6 +1008,12 @@ REGISTER_OP_NPU_KERNEL(
ops::SigmoidGradNPUKernel<paddle::platform::NPUDeviceContext, ops::SigmoidGradNPUKernel<paddle::platform::NPUDeviceContext,
paddle::platform::float16>); paddle::platform::float16>);
REGISTER_OP_NPU_KERNEL(swish, ops::SwishNPUKernel<float>,
ops::SwishNPUKernel<paddle::platform::float16>);
REGISTER_OP_NPU_KERNEL(swish_grad, ops::SwishGradNPUKernel<float>,
ops::SwishGradNPUKernel<paddle::platform::float16>);
REGISTER_OP_NPU_KERNEL(hard_swish, ops::HardSwishNPUKernel<float>, REGISTER_OP_NPU_KERNEL(hard_swish, ops::HardSwishNPUKernel<float>,
ops::HardSwishNPUKernel<paddle::platform::float16>); ops::HardSwishNPUKernel<paddle::platform::float16>);
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import numpy as np
import unittest
import sys
sys.path.append("..")
from paddle.fluid.tests.unittests.op_test import OpTest
import paddle
import paddle.fluid as fluid
from test_activation_op import ref_swish, expit
paddle.enable_static()
SEED = 1024
class TestSwishOp(OpTest):
def setUp(self):
self.op_type = "swish"
self.set_npu()
self.init_dtype()
np.random.seed(2048)
x = np.random.uniform(-1, 1, [10, 12]).astype(self.dtype)
out = ref_swish(x)
self.inputs = {'X': x}
self.attrs = {'beta': 1.0}
self.outputs = {'Out': out}
def test_check_output(self):
self.check_output_with_place(self.place)
def test_check_grad(self):
beta = self.attrs['beta']
out = self.outputs['Out']
x = self.inputs['X']
dx = beta * out + expit(x) * (1 - beta * out)
dx = dx / x.size
self.check_grad_with_place(
self.place, ['X'],
'Out',
max_relative_error=0.01,
user_defined_grads=[dx])
def set_npu(self):
self.__class__.use_npu = True
self.place = paddle.NPUPlace(0)
def init_dtype(self):
self.dtype = np.float32
class TestSwishOpFp16(TestSwishOp):
def test_check_output(self):
self.check_output_with_place(self.place, atol=1e-3)
def init_dtype(self):
self.dtype = np.float16
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册