未验证 提交 f71de378 编写于 作者: Y ykkk2333 提交者: GitHub

add silu, silu_grad, unfold and unfold_grad xpu kernels (#48325)

* add stat tool

* add roll and roll_grad kernels and strided_slice and strided_slice_grad kernels, test=kunlun

* add silu, unfold and their grads,test=kunlun
上级 518f9d81
...@@ -131,6 +131,9 @@ XPUOpMap& get_kl2_ops() { ...@@ -131,6 +131,9 @@ XPUOpMap& get_kl2_ops() {
{"conv2d", {"conv2d",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()), XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace())})}, pOpKernelType(vartype::FP16, XPUPlace())})},
{"conv3d_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace())})},
{"conv3d", {"conv3d",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()), XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace())})}, pOpKernelType(vartype::FP16, XPUPlace())})},
...@@ -282,6 +285,12 @@ XPUOpMap& get_kl2_ops() { ...@@ -282,6 +285,12 @@ XPUOpMap& get_kl2_ops() {
pOpKernelType(vartype::INT32, XPUPlace()), pOpKernelType(vartype::INT32, XPUPlace()),
pOpKernelType(vartype::INT8, XPUPlace()), pOpKernelType(vartype::INT8, XPUPlace()),
pOpKernelType(vartype::FP32, XPUPlace())})}, pOpKernelType(vartype::FP32, XPUPlace())})},
{"unfold",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace())})},
{"unfold_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace())})},
{"floor", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"floor", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"gather_grad", {"gather_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()), XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
...@@ -523,6 +532,12 @@ XPUOpMap& get_kl2_ops() { ...@@ -523,6 +532,12 @@ XPUOpMap& get_kl2_ops() {
{"sgd_dense_param_sparse_grad", {"sgd_dense_param_sparse_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()), XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace())})}, pOpKernelType(vartype::FP16, XPUPlace())})},
{"silu_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace())})},
{"silu",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace())})},
{"sigmoid_cross_entropy_with_logits_grad", {"sigmoid_cross_entropy_with_logits_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"sigmoid_cross_entropy_with_logits", {"sigmoid_cross_entropy_with_logits",
......
...@@ -367,6 +367,26 @@ struct XPURelu6GradFunctor : public funcs::BaseActivationFunctor<T> { ...@@ -367,6 +367,26 @@ struct XPURelu6GradFunctor : public funcs::BaseActivationFunctor<T> {
} }
}; };
template <typename T>
struct XPUSiluGradFunctor : public funcs::BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
template <typename Context>
void operator()(const Context& dev_ctx,
const DenseTensor* x,
const DenseTensor* out,
const DenseTensor* dout,
DenseTensor* dx) const {
dev_ctx.template Alloc<T>(dx);
const XPUType* x_data = reinterpret_cast<const XPUType*>(x->data<T>());
const XPUType* y_grad = reinterpret_cast<const XPUType*>(dout->data<T>());
XPUType* x_grad = reinterpret_cast<XPUType*>(dx->data<T>());
int r = xpu::swish_grad(
dev_ctx.x_context(), x_data, y_grad, x_grad, dx->numel());
PADDLE_ENFORCE_XDNN_SUCCESS(r, "swish_grad");
}
};
template <typename T> template <typename T>
struct XPUSigmoidGradFunctor : public funcs::BaseActivationFunctor<T> { struct XPUSigmoidGradFunctor : public funcs::BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type; using XPUType = typename XPUTypeTrait<T>::Type;
...@@ -552,6 +572,7 @@ DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Sqrt, XPUSqrtGradFunctor); ...@@ -552,6 +572,7 @@ DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Sqrt, XPUSqrtGradFunctor);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Tanh, XPUTanhGradFunctor); DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Tanh, XPUTanhGradFunctor);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Relu, XPUReluGradFunctor); DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Relu, XPUReluGradFunctor);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPX(Silu, XPUSiluGradFunctor);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPX(Log, XPULogGradFunctor); DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPX(Log, XPULogGradFunctor);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPX(Square, XPUSquareGradFunctor); DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPX(Square, XPUSquareGradFunctor);
...@@ -603,6 +624,12 @@ PD_REGISTER_KERNEL(relu_grad, ...@@ -603,6 +624,12 @@ PD_REGISTER_KERNEL(relu_grad,
phi::ReluGradKernel, phi::ReluGradKernel,
float, float,
phi::dtype::float16) {} phi::dtype::float16) {}
PD_REGISTER_KERNEL(silu_grad,
XPU,
ALL_LAYOUT,
phi::SiluGradKernel,
float,
phi::dtype::float16) {}
#define PD_REGISTER_ACTIVATION_GRAD_KERNEL(name, func) \ #define PD_REGISTER_ACTIVATION_GRAD_KERNEL(name, func) \
PD_REGISTER_KERNEL(name, XPU, ALL_LAYOUT, phi::func, float) {} PD_REGISTER_KERNEL(name, XPU, ALL_LAYOUT, phi::func, float) {}
......
...@@ -322,6 +322,24 @@ struct XPURelu6Functor : public funcs::BaseActivationFunctor<T> { ...@@ -322,6 +322,24 @@ struct XPURelu6Functor : public funcs::BaseActivationFunctor<T> {
} }
}; };
template <typename T>
struct XPUSiluFunctor : public funcs::BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
template <typename Context>
void operator()(const Context& dev_ctx,
const DenseTensor& x,
DenseTensor* out) const {
dev_ctx.template Alloc<T>(out);
const XPUType* x_data = reinterpret_cast<const XPUType*>(x.data<T>());
XPUType* y_data = reinterpret_cast<XPUType*>(out->data<T>());
auto xpu_context = dev_ctx.x_context();
int r =
xpu::swish(xpu_context, x_data, y_data, x.numel(), nullptr, nullptr);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "swish");
}
};
template <typename T> template <typename T>
struct XPUSigmoidFunctor : public funcs::BaseActivationFunctor<T> { struct XPUSigmoidFunctor : public funcs::BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type; using XPUType = typename XPUTypeTrait<T>::Type;
...@@ -448,6 +466,7 @@ DEFINE_XPU_ACTIVATION_KERNEL(Sigmoid, XPUSigmoidFunctor) ...@@ -448,6 +466,7 @@ DEFINE_XPU_ACTIVATION_KERNEL(Sigmoid, XPUSigmoidFunctor)
DEFINE_XPU_ACTIVATION_KERNEL(Square, XPUSquareFunctor) DEFINE_XPU_ACTIVATION_KERNEL(Square, XPUSquareFunctor)
DEFINE_XPU_ACTIVATION_KERNEL(Sqrt, XPUSqrtFunctor) DEFINE_XPU_ACTIVATION_KERNEL(Sqrt, XPUSqrtFunctor)
DEFINE_XPU_ACTIVATION_KERNEL(Tanh, XPUTanhFunctor) DEFINE_XPU_ACTIVATION_KERNEL(Tanh, XPUTanhFunctor)
DEFINE_XPU_ACTIVATION_KERNEL(Silu, XPUSiluFunctor)
DEFINE_XPU_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Mish, XPUMishFunctor, threshold) DEFINE_XPU_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Mish, XPUMishFunctor, threshold)
DEFINE_XPU_ACTIVATION_KERNEL_WITH_ONE_ATTRS(LeakyRelu, DEFINE_XPU_ACTIVATION_KERNEL_WITH_ONE_ATTRS(LeakyRelu,
...@@ -486,6 +505,8 @@ void HardSwishRawKernel(const Context& dev_ctx, ...@@ -486,6 +505,8 @@ void HardSwishRawKernel(const Context& dev_ctx,
PD_REGISTER_KERNEL( PD_REGISTER_KERNEL(
relu, XPU, ALL_LAYOUT, phi::ReluKernel, float, phi::dtype::float16) {} relu, XPU, ALL_LAYOUT, phi::ReluKernel, float, phi::dtype::float16) {}
PD_REGISTER_KERNEL(
silu, XPU, ALL_LAYOUT, phi::SiluKernel, float, phi::dtype::float16) {}
#define PD_REGISTER_ACTIVATION_KERNEL(name, func) \ #define PD_REGISTER_ACTIVATION_KERNEL(name, func) \
PD_REGISTER_KERNEL(name, XPU, ALL_LAYOUT, phi::func, float) {} PD_REGISTER_KERNEL(name, XPU, ALL_LAYOUT, phi::func, float) {}
......
...@@ -168,6 +168,127 @@ void DepthwiseConvGradKernel(const Context& dev_ctx, ...@@ -168,6 +168,127 @@ void DepthwiseConvGradKernel(const Context& dev_ctx,
filter_grad); filter_grad);
} }
template <typename T, typename Context>
void Conv3DGradKernel(const Context& dev_ctx,
const DenseTensor& input,
const DenseTensor& filter,
const DenseTensor& out_grad,
const std::vector<int>& strides,
const std::vector<int>& paddings_t,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations_t,
const std::string& data_format,
DenseTensor* input_grad,
DenseTensor* filter_grad) {
using XPUT = typename XPUTypeTrait<T>::Type;
std::vector<int> paddings = paddings_t;
std::vector<int> dilations = dilations_t;
// The filter and filter_grad will be reshaped in the calculations,
// so here use an assignment operation,
// that avoids modifying the variable in the Scope.
if (!input_grad && !filter_grad) return;
phi::DDim in_data_dims =
phi::slice_ddim(input.dims(), 2, input.dims().size());
phi::DDim filter_data_dims =
phi::slice_ddim(filter.dims(), 2, filter.dims().size());
std::vector<int> ksize = phi::vectorize<int>(filter_data_dims);
std::vector<int> filter_shape = phi::vectorize<int>(filter.dims());
UpdatePaddingAndDilation(
&paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize);
int batch_size = static_cast<int>(input.dims()[0]);
int img_c = static_cast<int>(input.dims()[1]);
int img_d = static_cast<int>(input.dims()[2]);
int img_h = static_cast<int>(input.dims()[3]);
int img_w = static_cast<int>(input.dims()[4]);
int f = static_cast<int>(filter.dims()[0]);
bool is_ncdhw = true;
if (data_format == "NDHWC") {
img_c = static_cast<int>(input.dims()[4]);
img_d = static_cast<int>(input.dims()[1]);
img_h = static_cast<int>(input.dims()[2]);
img_w = static_cast<int>(input.dims()[3]);
is_ncdhw = false;
}
const XPUT* input_data = reinterpret_cast<const XPUT*>(input.data<T>());
const XPUT* filter_data = reinterpret_cast<const XPUT*>(filter.data<T>());
const XPUT* output_grad_data =
reinterpret_cast<const XPUT*>(out_grad.data<T>());
XPUT* input_grad_data = nullptr;
if (input_grad) {
dev_ctx.template Alloc<T>(input_grad);
input_grad_data = reinterpret_cast<XPUT*>(input_grad->data<T>());
}
XPUT* filter_grad_data = nullptr;
if (filter_grad) {
dev_ctx.template Alloc<T>(filter_grad);
filter_grad_data = reinterpret_cast<XPUT*>(filter_grad->data<T>());
}
xpu::ctx_guard RAII_GUARD(dev_ctx.x_context());
XPUT* filter_data_tmp;
XPUT* filter_grad_data_tmp;
const XPUT* filter_data_ptr = filter_data;
XPUT* filter_grad_data_ptr = filter_grad_data;
if (data_format == "NDHWC") {
filter_data_tmp = RAII_GUARD.alloc<XPUT>(filter.numel());
PADDLE_ENFORCE_XDNN_NOT_NULL(filter_data_tmp);
int r = xpu::transpose<XPUT>(dev_ctx.x_context(),
filter_data,
filter_data_tmp,
filter_shape,
{0, 2, 3, 4, 1});
PADDLE_ENFORCE_XDNN_SUCCESS(r, "transpose");
filter_data_ptr = reinterpret_cast<const XPUT*>(filter_data_tmp);
if (filter_grad_data != nullptr) {
filter_grad_data_tmp = RAII_GUARD.alloc<XPUT>(filter.numel());
PADDLE_ENFORCE_XDNN_NOT_NULL(filter_grad_data_tmp);
filter_grad_data_ptr = filter_grad_data_tmp;
}
}
int r = xpu::conv3d_grad<XPUT, XPUT, XPUT, int16_t>(dev_ctx.x_context(),
input_data,
filter_data_ptr,
output_grad_data,
input_grad_data,
filter_grad_data_ptr,
batch_size,
img_c,
img_d,
img_h,
img_w,
f,
ksize,
strides,
paddings,
dilations,
groups,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
is_ncdhw);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "conv3d_grad");
if ((filter_grad_data_ptr != nullptr) && (data_format == "NDHWC")) {
std::vector<int> filter_shape_fhwc = {filter_shape[0],
filter_shape[2],
filter_shape[3],
filter_shape[4],
filter_shape[1]};
int r = xpu::transpose<XPUT>(dev_ctx.x_context(),
filter_grad_data_ptr,
filter_grad_data,
filter_shape_fhwc,
{0, 4, 1, 2, 3});
PADDLE_ENFORCE_XDNN_SUCCESS(r, "transpose");
}
}
} // namespace phi } // namespace phi
PD_REGISTER_KERNEL(conv2d_grad, PD_REGISTER_KERNEL(conv2d_grad,
...@@ -182,3 +303,9 @@ PD_REGISTER_KERNEL(depthwise_conv2d_grad, ...@@ -182,3 +303,9 @@ PD_REGISTER_KERNEL(depthwise_conv2d_grad,
ALL_LAYOUT, ALL_LAYOUT,
phi::DepthwiseConvGradKernel, phi::DepthwiseConvGradKernel,
float) {} float) {}
PD_REGISTER_KERNEL(conv3d_grad,
XPU,
ALL_LAYOUT,
phi::Conv3DGradKernel,
float,
phi::dtype::float16) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/unfold_grad_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/unfold_functor.h"
namespace phi {
template <typename T, typename Context>
void UnfoldGradKernel(const Context& ctx,
const DenseTensor& x,
const DenseTensor& out_grad,
const std::vector<int>& kernel_sizes,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::vector<int>& dilations,
DenseTensor* x_grad) {
using XPUType = typename XPUTypeTrait<T>::Type;
ctx.template Alloc<T>(x_grad);
const std::string data_format = phi::DataLayoutToString(x.layout());
bool is_nchw = data_format == "NCHW";
PADDLE_ENFORCE_EQ(is_nchw,
true,
phi::errors::PreconditionNotMet(
"Unfold grad op only supports datalayout == NCHW"));
auto x_dims = x_grad->dims();
int n = static_cast<int>(x_dims[0]);
int c = static_cast<int>(x_dims[1]);
int h = static_cast<int>(x_dims[2]);
int w = static_cast<int>(x_dims[3]);
int out_height = phi::funcs::CalcOutputSize(x_dims[2],
kernel_sizes[0],
dilations[0],
paddings[0],
paddings[2],
strides[0]);
int out_width = phi::funcs::CalcOutputSize(x_dims[3],
kernel_sizes[1],
dilations[1],
paddings[1],
paddings[3],
strides[1]);
xpu::ctx_guard RAII_GUARD(ctx.x_context());
XPUType* out_grad_trans =
RAII_GUARD.alloc_l3_or_gm<XPUType>(out_grad.numel());
int r = xpu::transpose(
ctx.x_context(),
reinterpret_cast<const XPUType*>(out_grad.data<T>()),
out_grad_trans,
{n, c, kernel_sizes[0], kernel_sizes[1], out_height, out_width},
{0, 4, 5, 1, 2, 3});
PADDLE_ENFORCE_XDNN_SUCCESS(r, "transpose");
r = xpu::col2im(ctx.x_context(),
out_grad_trans,
reinterpret_cast<XPUType*>(x_grad->data<T>()),
n,
c,
h,
w,
kernel_sizes,
strides,
paddings,
dilations,
is_nchw);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "col2im");
}
} // namespace phi
PD_REGISTER_KERNEL(unfold_grad,
XPU,
ALL_LAYOUT,
phi::UnfoldGradKernel,
float,
phi::dtype::float16) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/unfold_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/unfold_functor.h"
namespace phi {
template <typename T, typename Context>
void UnfoldKernel(const Context& ctx,
const DenseTensor& x,
const std::vector<int>& kernel_sizes,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::vector<int>& dilations,
DenseTensor* out) {
using XPUType = typename XPUTypeTrait<T>::Type;
ctx.template Alloc<T>(out);
const std::string data_format = phi::DataLayoutToString(x.layout());
bool is_nchw = data_format == "NCHW";
PADDLE_ENFORCE_EQ(is_nchw,
true,
phi::errors::PreconditionNotMet(
"Unfold op only supports datalayout == NCHW"));
auto x_dims = x.dims();
int n = static_cast<int>(x_dims[0]);
int c = static_cast<int>(x_dims[1]);
int h = static_cast<int>(x_dims[2]);
int w = static_cast<int>(x_dims[3]);
int out_height = phi::funcs::CalcOutputSize(x_dims[2],
kernel_sizes[0],
dilations[0],
paddings[0],
paddings[2],
strides[0]);
int out_width = phi::funcs::CalcOutputSize(x_dims[3],
kernel_sizes[1],
dilations[1],
paddings[1],
paddings[3],
strides[1]);
xpu::ctx_guard RAII_GUARD(ctx.x_context());
XPUType* out_pre_trans = RAII_GUARD.alloc_l3_or_gm<XPUType>(out->numel());
int r = xpu::im2col(ctx.x_context(),
reinterpret_cast<const XPUType*>(x.data<T>()),
out_pre_trans,
n,
c,
h,
w,
kernel_sizes,
strides,
paddings,
dilations,
is_nchw);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "im2col");
r = xpu::transpose(
ctx.x_context(),
out_pre_trans,
reinterpret_cast<XPUType*>(out->data<T>()),
{n, out_height, out_width, c, kernel_sizes[0], kernel_sizes[1]},
{0, 3, 4, 5, 1, 2});
PADDLE_ENFORCE_XDNN_SUCCESS(r, "transpose");
}
} // namespace phi
PD_REGISTER_KERNEL(
unfold, XPU, ALL_LAYOUT, phi::UnfoldKernel, float, phi::dtype::float16) {}
...@@ -19,6 +19,9 @@ import numpy as np ...@@ -19,6 +19,9 @@ import numpy as np
sys.path.append("..") sys.path.append("..")
import paddle
import paddle.nn.functional as F
from op_test import OpTest from op_test import OpTest
from op_test_xpu import XPUOpTest from op_test_xpu import XPUOpTest
from xpu.get_test_cover_info import ( from xpu.get_test_cover_info import (
...@@ -86,6 +89,87 @@ for stype in support_types: ...@@ -86,6 +89,87 @@ for stype in support_types:
create_test_class(globals(), XPUTestExpOP, stype) create_test_class(globals(), XPUTestExpOP, stype)
class XPUTestSiluOP(XPUOpTestWrapper):
def __init__(self):
self.op_name = 'silu'
self.use_dynamic_create_class = False
class XPUTestSilu(TestActivationOPBase):
def set_case(self):
self.op_type = "silu"
self.dtype = self.in_type
self.init_shape()
np.random.seed(1024)
x = np.random.uniform(-1, 1, self.shape).astype(self.dtype)
out = x / (np.exp(-x) + 1)
self.inputs = {'X': x}
self.outputs = {'Out': out}
self.attrs = {'use_xpu': True}
def test_check_grad(self):
self.check_grad_with_place(self.place, ['X'], 'Out')
def init_shape(self):
self.shape = [11, 17]
class TestSilu_ZeroDim(XPUTestSilu):
def init_shape(self):
self.shape = []
class TestSiluAPI(unittest.TestCase):
# test paddle.nn.Silu, paddle.nn.functional.silu
def setUp(self):
self.x_np = np.random.uniform(-1, 1, [11, 17]).astype('float32')
self.place = paddle.XPUPlace(0)
def test_static_api(self):
paddle.enable_static()
with paddle.static.program_guard(paddle.static.Program()):
x = paddle.fluid.data('X', [11, 17])
out1 = F.silu(x)
m = paddle.nn.Silu()
out2 = m(x)
exe = paddle.static.Executor(self.place)
res = exe.run(feed={'X': self.x_np}, fetch_list=[out1, out2])
out_ref = self.x_np / (1 + np.exp(-self.x_np))
for r in res:
np.testing.assert_allclose(out_ref, r, rtol=1e-05)
def test_dygraph_api(self):
paddle.disable_static(self.place)
x = paddle.to_tensor(self.x_np)
out1 = F.silu(x)
m = paddle.nn.Silu()
out2 = m(x)
out_ref = self.x_np / (1 + np.exp(-self.x_np))
for r in [out1, out2]:
np.testing.assert_allclose(out_ref, r.numpy(), rtol=1e-05)
paddle.enable_static()
def test_errors(self):
with paddle.static.program_guard(paddle.static.Program()):
# The input type must be Variable.
self.assertRaises(TypeError, F.silu, 1)
# The input dtype must be float16, float32, float64.
x_int32 = paddle.fluid.data(
name='x_int32', shape=[11, 17], dtype='int32'
)
self.assertRaises(TypeError, F.silu, x_int32)
# support the input dtype is float16
x_fp16 = paddle.fluid.data(
name='x_fp16', shape=[11, 17], dtype='float16'
)
F.silu(x_fp16)
support_types = get_xpu_op_support_types('silu')
for stype in support_types:
create_test_class(globals(), XPUTestSiluOP, stype)
class XPUTestSigmoidOP(XPUOpTestWrapper): class XPUTestSigmoidOP(XPUOpTestWrapper):
def __init__(self): def __init__(self):
self.op_name = 'sigmoid' self.op_name = 'sigmoid'
......
...@@ -258,6 +258,38 @@ class XPUTestConv3DOp(XPUOpTestWrapper): ...@@ -258,6 +258,38 @@ class XPUTestConv3DOp(XPUOpTestWrapper):
place = paddle.XPUPlace(0) place = paddle.XPUPlace(0)
self.check_output_with_place(place) self.check_output_with_place(place)
def test_check_grad(self):
place = paddle.XPUPlace(0)
# TODO(wangzhongpu): support mkldnn op in dygraph mode
self.check_grad_with_place(
place,
{'Input', 'Filter'},
'Output',
max_relative_error=0.03,
)
def test_check_grad_no_filter(self):
place = paddle.XPUPlace(0)
# TODO(wangzhongpu): support mkldnn op in dygraph mode
self.check_grad_with_place(
place,
['Input'],
'Output',
max_relative_error=0.03,
no_grad_set=set(['Filter']),
)
def test_check_grad_no_input(self):
place = paddle.XPUPlace(0)
# TODO(wangzhongpu): support mkldnn op in dygraph mode
self.check_grad_with_place(
place,
['Filter'],
'Output',
max_relative_error=0.03,
no_grad_set=set(['Input']),
)
def init_test_case(self): def init_test_case(self):
self.pad = [0, 0, 0] self.pad = [0, 0, 0]
self.stride = [1, 1, 1] self.stride = [1, 1, 1]
...@@ -401,6 +433,32 @@ class XPUTestConv3DOp_v2(XPUOpTestWrapper): ...@@ -401,6 +433,32 @@ class XPUTestConv3DOp_v2(XPUOpTestWrapper):
place = paddle.XPUPlace(0) place = paddle.XPUPlace(0)
self.check_output_with_place(place) self.check_output_with_place(place)
def test_check_grad(self):
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place, {'Input', 'Filter'}, 'Output', max_relative_error=0.03
)
def test_check_grad_no_filter(self):
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place,
['Input'],
'Output',
max_relative_error=0.03,
no_grad_set=set(['Filter']),
)
def test_check_grad_no_input(self):
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place,
['Filter'],
'Output',
max_relative_error=0.03,
no_grad_set=set(['Input']),
)
def init_test_case(self): def init_test_case(self):
self.stride = [1, 1, 1] self.stride = [1, 1, 1]
self.input_size = [2, 3, 4, 4, 4] # NCDHW self.input_size = [2, 3, 4, 4, 4] # NCDHW
......
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.fluid as fluid
import numpy as np
import sys
import unittest
sys.path.append("..")
from op_test_xpu import XPUOpTest
from xpu.get_test_cover_info import (
create_test_class,
get_xpu_op_support_types,
XPUOpTestWrapper,
)
paddle.enable_static()
class XPUTestUnfoldOp(XPUOpTestWrapper):
def __init__(self):
self.op_name = 'unfold'
self.use_dynamic_create_class = False
class TestUnfoldOp(XPUOpTest):
"""
This is for test on unfold Op
"""
def init_data(self):
self.batch_size = 3
self.input_channels = 3
self.input_height = 20
self.input_width = 20
self.kernel_sizes = [2, 2]
self.strides = [1, 1]
self.paddings = [1, 1, 1, 1]
self.dilations = [1, 1]
input_shape = [
self.batch_size,
self.input_channels,
self.input_height,
self.input_width,
]
self.x = np.random.rand(*input_shape).astype(self.dtype)
def calc_unfold(self):
output_shape = [0] * 3
output_shape[0] = self.batch_size
output_shape[1] = (
self.input_channels
* self.kernel_sizes[0]
* self.kernel_sizes[1]
)
dkernel_h = self.dilations[0] * (self.kernel_sizes[0] - 1) + 1
dkernel_w = self.dilations[1] * (self.kernel_sizes[1] - 1) + 1
out_height = (
int(
(
self.input_height
+ self.paddings[0]
+ self.paddings[2]
- dkernel_h
)
/ self.strides[0]
)
+ 1
)
out_width = (
int(
(
self.input_width
+ self.paddings[1]
+ self.paddings[3]
- dkernel_w
)
/ self.strides[1]
)
+ 1
)
output_shape[2] = out_height * out_width
output = np.zeros(output_shape).astype(np.float64)
# ------------ calculate output -------------- #
for i in range(output_shape[0]):
for j in range(output_shape[1]):
for k in range(output_shape[2]):
h_out = int(k / out_width)
w_out = k % out_width
w_offset = j % self.kernel_sizes[1]
h_offset = (
int(j / self.kernel_sizes[1]) % self.kernel_sizes[0]
)
c_in = int(
j / (self.kernel_sizes[0] * self.kernel_sizes[1])
)
h_in = (
h_offset * self.dilations[0]
+ h_out * self.strides[0]
- self.paddings[0]
)
w_in = (
w_offset * self.dilations[1]
+ w_out * self.strides[1]
- self.paddings[1]
)
if (h_in >= 0 and h_in < self.input_height) and (
w_in >= 0 and w_in < self.input_width
):
output[i, j, k] = self.x[i, c_in, h_in, w_in]
self.outputs = output
def set_data(self):
self.init_data()
self.calc_unfold()
self.inputs = {'X': self.x}
self.attrs = {
'kernel_sizes': self.kernel_sizes,
'paddings': self.paddings,
'dilations': self.dilations,
'strides': self.strides,
}
self.outputs = {'Y': self.outputs}
def setUp(self):
self.op_type = 'unfold'
self.dtype = self.in_type
self.set_data()
def test_check_output(self):
self.check_output_with_place(paddle.XPUPlace(0))
def test_check_grad(self):
self.check_grad_with_place(paddle.XPUPlace(0), ['X'], 'Y')
class TestUnfoldAPI(TestUnfoldOp):
"""
This is for test on paddle.nn.Unfold
"""
def setUp(self):
self.op_type = 'unfold'
self.set_data()
self.places = [paddle.XPUPlace(0)]
def test_dygraph(self):
for place in self.places:
with fluid.dygraph.guard(place):
input = fluid.dygraph.to_variable(self.inputs['X'])
m = paddle.nn.Unfold(**self.attrs)
m.eval()
result = m(input)
np.testing.assert_allclose(
result.numpy(), self.outputs['Y'], rtol=1e-05
)
def test_info(self):
str(paddle.nn.Unfold(**self.attrs))
support_types = get_xpu_op_support_types('unfold')
for stype in support_types:
create_test_class(globals(), XPUTestUnfoldOp, stype)
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册