未验证 提交 db0ea0ce 编写于 作者: Y ykkk2333 提交者: GitHub

add masked_select_grad kernel (#48137)

* add stat tool

* add roll and roll_grad kernels and strided_slice and strided_slice_grad kernels, test=kunlun

* add masked_selected_grad kernel,test=kunlun
上级 a0f47350
......@@ -123,13 +123,17 @@ XPUOpMap& get_kl2_ops() {
pOpKernelType(vartype::FP16, XPUPlace())})},
{"concat",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace())})},
pOpKernelType(vartype::FP16, XPUPlace()),
pOpKernelType(vartype::INT64, XPUPlace())})},
{"conv2d_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace())})},
{"conv2d",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace())})},
{"conv3d",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace())})},
{"conv2d_transpose_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"conv2d_transpose",
......@@ -375,6 +379,12 @@ XPUOpMap& get_kl2_ops() {
pOpKernelType(vartype::INT64, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace()),
pOpKernelType(vartype::FP32, XPUPlace())})},
{"masked_select_grad",
XPUKernelSet({pOpKernelType(vartype::INT32, XPUPlace()),
pOpKernelType(vartype::INT64, XPUPlace()),
pOpKernelType(vartype::BOOL, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace()),
pOpKernelType(vartype::FP32, XPUPlace())})},
{"matmul_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace())})},
......@@ -502,6 +512,9 @@ XPUOpMap& get_kl2_ops() {
{"sgd",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace())})},
{"sgd_dense_param_sparse_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace())})},
{"sigmoid_cross_entropy_with_logits_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"sigmoid_cross_entropy_with_logits",
......
......@@ -50,6 +50,7 @@ void ConcatKernel(const Context& dev_ctx,
x[0]->dims().size()));
// If axis is 0, the lod of the output is not the same as inputs.
if (axis == 0 && x[0]->lod().size() > 0) {
size_t lod_size_0 = x[0]->lod().size();
size_t lod_size = lod_size_0;
......@@ -79,7 +80,9 @@ void ConcatKernel(const Context& dev_ctx,
}
}
}
dev_ctx.template Alloc<T>(out);
std::vector<std::vector<int>> xdims_list;
std::vector<const XPUType*> ptrs;
for (unsigned int i = 0; i < x.size(); ++i) {
......@@ -97,6 +100,7 @@ void ConcatKernel(const Context& dev_ctx,
PADDLE_ENFORCE_GT(xdims_list.size(),
0,
phi::errors::InvalidArgument("No tensor need concat"));
int r = xpu::concat<XPUType>(dev_ctx.x_context(),
ptrs,
reinterpret_cast<XPUType*>(out->data<T>()),
......@@ -107,5 +111,10 @@ void ConcatKernel(const Context& dev_ctx,
} // namespace phi
PD_REGISTER_KERNEL(
concat, XPU, ALL_LAYOUT, phi::ConcatKernel, float, phi::dtype::float16) {}
PD_REGISTER_KERNEL(concat,
XPU,
ALL_LAYOUT,
phi::ConcatKernel,
float,
phi::dtype::float16,
int64_t) {}
......@@ -131,9 +131,96 @@ void DepthwiseConvKernel(const Context& dev_ctx,
out);
}
template <typename T, typename Context>
void Conv3DKernel(const Context& dev_ctx,
const DenseTensor& input,
const DenseTensor& filter,
const std::vector<int>& strides,
const std::vector<int>& paddings_t,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations_t,
const std::string& data_format,
DenseTensor* out) {
using XPUT = typename XPUTypeTrait<T>::Type;
std::vector<int> paddings = paddings_t;
std::vector<int> dilations = dilations_t;
// The filter will be reshaped in the calculations,
// so here use an assignment operation,
// that avoids modifying the variable in the Scope.
dev_ctx.template Alloc<T>(out);
phi::DDim in_data_dims =
phi::slice_ddim(input.dims(), 2, input.dims().size());
phi::DDim filter_data_dims =
phi::slice_ddim(filter.dims(), 2, filter.dims().size());
std::vector<int> ksize = phi::vectorize<int>(filter_data_dims);
UpdatePaddingAndDilation(
&paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize);
int batch_size = static_cast<int>(input.dims()[0]);
int img_c = static_cast<int>(input.dims()[1]);
int img_d = static_cast<int>(input.dims()[2]);
int img_h = static_cast<int>(input.dims()[3]);
int img_w = static_cast<int>(input.dims()[4]);
int f = static_cast<int>(filter.dims()[0]);
bool is_ncdhw = true;
if (data_format == "NDHWC") {
img_c = static_cast<int>(input.dims()[4]);
img_d = static_cast<int>(input.dims()[1]);
img_h = static_cast<int>(input.dims()[2]);
img_w = static_cast<int>(input.dims()[3]);
is_ncdhw = false;
}
XPUT* output_data = reinterpret_cast<XPUT*>(out->data<T>());
const XPUT* filter_data = reinterpret_cast<const XPUT*>(filter.data<T>());
const XPUT* input_data = reinterpret_cast<const XPUT*>(input.data<T>());
xpu::ctx_guard RAII_GUARD(dev_ctx.x_context());
XPUT* filter_data_tmp;
const XPUT* filter_data_ptr = filter_data;
if (data_format == "NDHWC") {
filter_data_tmp = RAII_GUARD.alloc<XPUT>(filter.numel());
PADDLE_ENFORCE_XDNN_NOT_NULL(filter_data_tmp);
std::vector<int> filter_shape = phi::vectorize<int>(filter.dims());
int r = xpu::transpose<XPUT>(dev_ctx.x_context(),
filter_data,
filter_data_tmp,
filter_shape,
{0, 2, 3, 4, 1});
PADDLE_ENFORCE_XDNN_SUCCESS(r, "transpose");
filter_data_ptr = reinterpret_cast<const XPUT*>(filter_data_tmp);
}
int r = xpu::conv3d<XPUT, XPUT, XPUT, int16_t>(dev_ctx.x_context(),
input_data,
filter_data_ptr,
output_data,
batch_size,
img_c,
img_d,
img_h,
img_w,
f,
ksize,
strides,
paddings,
dilations,
groups,
nullptr,
nullptr,
nullptr,
is_ncdhw);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "conv3d");
}
} // namespace phi
PD_REGISTER_KERNEL(
conv2d, XPU, ALL_LAYOUT, phi::ConvKernel, float, phi::dtype::float16) {}
PD_REGISTER_KERNEL(
depthwise_conv2d, XPU, ALL_LAYOUT, phi::DepthwiseConvKernel, float) {}
PD_REGISTER_KERNEL(
conv3d, XPU, ALL_LAYOUT, phi::Conv3DKernel, float, phi::dtype::float16) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/masked_select_grad_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/core/kernel_registry.h"
namespace phi {
template <typename T, typename Context>
void MaskedSelectGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& mask,
const DenseTensor& out_grad,
DenseTensor* x_grad) {
using XPUType = typename XPUTypeTrait<T>::Type;
auto* mask_data = mask.data<bool>();
auto* input_data = reinterpret_cast<const XPUType*>(out_grad.data<T>());
auto* out_data =
reinterpret_cast<XPUType*>(dev_ctx.template Alloc<T>(x_grad));
auto mask_shape = phi::vectorize<int>(mask.dims());
auto xshape = phi::vectorize<int>(x_grad->dims());
int r = xpu::masked_select_grad(dev_ctx.x_context(),
input_data,
mask_data,
out_data,
xshape,
mask_shape,
1);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "masked_select_grad");
}
} // namespace phi
PD_REGISTER_KERNEL(masked_select_grad,
XPU,
ALL_LAYOUT,
phi::MaskedSelectGradKernel,
float,
phi::dtype::float16,
int,
bool,
int64_t) {}
......@@ -20,14 +20,14 @@
namespace phi {
template <typename T, typename Context>
void SGDDenseKernel(const Context &dev_ctx,
const DenseTensor &param,
const DenseTensor &learning_rate,
const DenseTensor &grad,
const paddle::optional<DenseTensor> &master_param,
void SGDDenseKernel(const Context& dev_ctx,
const DenseTensor& param,
const DenseTensor& learning_rate,
const DenseTensor& grad,
const paddle::optional<DenseTensor>& master_param,
bool multi_precision,
DenseTensor *param_out,
DenseTensor *master_param_out) {
DenseTensor* param_out,
DenseTensor* master_param_out) {
using XPUType = typename XPUTypeTrait<T>::Type;
auto sz = param_out->numel();
PADDLE_ENFORCE_EQ(
......@@ -49,37 +49,103 @@ void SGDDenseKernel(const Context &dev_ctx,
grad.numel(),
sz));
const T *lr_t = learning_rate.data<T>();
const T* lr_t = learning_rate.data<T>();
xpu::ctx_guard RAII_GUARD(dev_ctx.x_context());
const float *lr = nullptr;
const float* lr = nullptr;
if (std::is_same<T, dtype::float16>::value) {
float *lr_float = RAII_GUARD.alloc_l3_or_gm<float>(learning_rate.numel());
float* lr_float = RAII_GUARD.alloc_l3_or_gm<float>(learning_rate.numel());
int r = xpu::cast<XPUType, float>(dev_ctx.x_context(),
reinterpret_cast<const XPUType *>(lr_t),
reinterpret_cast<const XPUType*>(lr_t),
lr_float,
learning_rate.numel());
PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast");
lr = lr_float;
} else {
lr = reinterpret_cast<const float *>(lr_t);
lr = reinterpret_cast<const float*>(lr_t);
}
const T *param_data = param.data<T>();
const T *grad_data = grad.data<T>();
const T* param_data = param.data<T>();
const T* grad_data = grad.data<T>();
dev_ctx.template Alloc<T>(param_out);
T *out_data = param_out->data<T>();
T* out_data = param_out->data<T>();
int r = xpu::sgd(dev_ctx.x_context(),
reinterpret_cast<const XPUType *>(grad_data),
reinterpret_cast<const XPUType *>(param_data),
reinterpret_cast<const XPUType*>(grad_data),
reinterpret_cast<const XPUType*>(param_data),
lr,
reinterpret_cast<XPUType *>(out_data),
reinterpret_cast<XPUType*>(out_data),
sz);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "sgd");
}
template <typename T, typename Context>
void SGDDenseParamSparseGradKernel(
const Context& dev_ctx,
const DenseTensor& param,
const DenseTensor& learning_rate,
const SelectedRows& grad,
const paddle::optional<DenseTensor>& master_param,
bool multi_precision,
DenseTensor* param_out,
DenseTensor* master_param_out) {
using XPUType = typename XPUTypeTrait<T>::Type;
dev_ctx.template Alloc<T>(param_out);
PADDLE_ENFORCE_EQ(
&param,
param_out,
phi::errors::InvalidArgument(
"The input tensor Param of SgdOp should be equal with ParamOut "
"if variable's type is SelectedRows."));
auto in_height = grad.height();
auto out_dims = param_out->dims();
PADDLE_ENFORCE_EQ(in_height,
out_dims[0],
phi::errors::InvalidArgument(
"The input tensor Grad's height of SgdOp should be "
"equal with ParamOut's dims. But received Grad's "
"height [%s] and ParamOut's dims [%s]",
in_height,
out_dims[0]));
auto& in_value = grad.value();
auto& in_rows = grad.rows();
int64_t* in_rows_data = nullptr;
xpu::VectorParam<int64_t> in_rows_vec{
in_rows.data(), static_cast<int>(in_rows.size()), in_rows_data};
int64_t in_row_numel = in_value.numel() / in_rows.size();
PADDLE_ENFORCE_EQ(in_row_numel,
param_out->numel() / in_height,
phi::errors::InvalidArgument(
"The in_row_numel of SgdOp should be equal with "
"param_out's numel / in_height."));
auto* in_data = in_value.data<T>();
auto* out_data = param_out->data<T>();
int r = xpu::sparse_sgd<XPUType, int64_t>(
dev_ctx.x_context(),
reinterpret_cast<const XPUType*>(in_data),
reinterpret_cast<const XPUType*>(param.data<T>()),
learning_rate.data<float>(),
in_rows_vec,
reinterpret_cast<XPUType*>(out_data),
in_row_numel,
in_rows.size());
PADDLE_ENFORCE_XDNN_SUCCESS(r, "sparse_sgd");
}
} // namespace phi
PD_REGISTER_KERNEL(
sgd, XPU, ALL_LAYOUT, phi::SGDDenseKernel, phi::dtype::float16, float) {}
PD_REGISTER_KERNEL(sgd_dense_param_sparse_grad,
XPU,
ALL_LAYOUT,
phi::SGDDenseParamSparseGradKernel,
phi::dtype::float16,
float) {}
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
sys.path.append("..")
import unittest
import numpy as np
from op_test_xpu import XPUOpTest
import paddle.fluid as fluid
import paddle
from xpu.get_test_cover_info import (
create_test_class,
XPUOpTestWrapper,
)
def conv3d_forward_naive(
input,
filter,
group,
conv_param,
padding_algorithm='EXPLICIT',
data_format="NCDHW",
):
if padding_algorithm not in ["SAME", "VALID", "EXPLICIT"]:
raise ValueError(
"Unknown Attr(padding_algorithm): '%s'. "
"It can only be 'SAME' or 'VALID'." % str(padding_algorithm)
)
if data_format not in ["NCDHW", "NDHWC"]:
raise ValueError(
"Unknown Attr(data_format): '%s' ."
"It can only be 'NCDHW' or 'NDHWC'." % str(data_format)
)
channel_last = data_format == "NDHWC"
if channel_last:
input = np.transpose(input, [0, 4, 1, 2, 3])
in_n, in_c, in_d, in_h, in_w = input.shape
f_n, f_c, f_d, f_h, f_w = filter.shape
out_n = in_n
out_c = f_n
assert f_c * group == in_c
assert np.mod(out_c, group) == 0
sub_out_c = out_c // group
sub_f_n = f_n // group
stride, pad, dilation = (
conv_param['stride'],
conv_param['pad'],
conv_param['dilations'],
)
# update pad and dilation
def _get_padding_with_SAME(input_shape, pool_size, pool_stride):
padding = []
for input_size, filter_size, stride_size in zip(
input_shape, pool_size, pool_stride
):
out_size = int((input_size + stride_size - 1) / stride_size)
pad_sum = np.max(
((out_size - 1) * stride_size + filter_size - input_size, 0)
)
pad_0 = int(pad_sum / 2)
pad_1 = int(pad_sum - pad_0)
padding.append(pad_0)
padding.append(pad_1)
return padding
ksize = filter.shape[2:5]
if padding_algorithm == "VALID":
pad = [0, 0, 0, 0, 0, 0]
elif padding_algorithm == "SAME":
dilation = [1, 1, 1]
input_data_shape = input.shape[2:5]
pad = _get_padding_with_SAME(input_data_shape, ksize, stride)
pad_d_0, pad_d_1 = pad[0], pad[0]
pad_h_0, pad_h_1 = pad[1], pad[1]
pad_w_0, pad_w_1 = pad[2], pad[2]
if len(pad) == 6:
pad_d_0, pad_d_1 = pad[0], pad[1]
pad_h_0, pad_h_1 = pad[2], pad[3]
pad_w_0, pad_w_1 = pad[4], pad[5]
out_d = (
1
+ (in_d + pad_d_0 + pad_d_1 - (dilation[0] * (f_d - 1) + 1))
// stride[0]
)
out_h = (
1
+ (in_h + pad_h_0 + pad_h_1 - (dilation[1] * (f_h - 1) + 1))
// stride[1]
)
out_w = (
1
+ (in_w + pad_w_0 + pad_w_1 - (dilation[2] * (f_w - 1) + 1))
// stride[2]
)
out = np.zeros((in_n, out_c, out_d, out_h, out_w))
d_bolck_d = dilation[0] * (f_d - 1) + 1
d_bolck_h = dilation[1] * (f_h - 1) + 1
d_bolck_w = dilation[2] * (f_w - 1) + 1
input_pad = np.pad(
input,
(
(0, 0),
(0, 0),
(pad_d_0, pad_d_1),
(pad_h_0, pad_h_1),
(pad_w_0, pad_w_1),
),
mode='constant',
constant_values=0,
)
filter_dilation = np.zeros((f_n, f_c, d_bolck_d, d_bolck_h, d_bolck_w))
filter_dilation[
:,
:,
0 : d_bolck_d : dilation[0],
0 : d_bolck_h : dilation[1],
0 : d_bolck_w : dilation[2],
] = filter
for d in range(out_d):
for i in range(out_h):
for j in range(out_w):
for g in range(group):
input_pad_masked = input_pad[
:,
g * f_c : (g + 1) * f_c,
d * stride[0] : d * stride[0] + d_bolck_d,
i * stride[1] : i * stride[1] + d_bolck_h,
j * stride[2] : j * stride[2] + d_bolck_w,
]
f_sub = filter_dilation[
g * sub_f_n : (g + 1) * sub_f_n, :, :, :, :
]
for k in range(sub_out_c):
out[:, g * sub_out_c + k, d, i, j] = np.sum(
input_pad_masked * f_sub[k, :, :, :, :],
axis=(1, 2, 3, 4),
)
if channel_last:
out = np.transpose(out, [0, 2, 3, 4, 1])
return out
def create_test_padding_SAME_class(parent):
class TestPaddingSMAECase(parent):
def init_paddings(self):
self.pad = [0, 0, 0]
self.padding_algorithm = "SAME"
cls_name = "{0}_{1}".format(parent.__name__, "PaddingSAMEOp")
TestPaddingSMAECase.__name__ = cls_name
globals()[cls_name] = TestPaddingSMAECase
def create_test_padding_VALID_class(parent):
class TestPaddingVALIDCase(parent):
def init_paddings(self):
self.pad = [1, 1, 1]
self.padding_algorithm = "VALID"
cls_name = "{0}_{1}".format(parent.__name__, "PaddingVALIDOp")
TestPaddingVALIDCase.__name__ = cls_name
globals()[cls_name] = TestPaddingVALIDCase
def create_test_channel_last_class(parent):
class TestChannelLastCase(parent):
def init_data_format(self):
self.data_format = "NDHWC"
def init_test_case_2(self):
N, C, D, H, W = self.input_size
self.input_size = [N, D, H, W, C]
cls_name = "{0}_{1}".format(parent.__name__, "ChannelLast")
TestChannelLastCase.__name__ = cls_name
globals()[cls_name] = TestChannelLastCase
paddle.enable_static()
class XPUTestConv3DOp(XPUOpTestWrapper):
def __init__(self):
self.op_name = 'conv3d'
self.use_dynamic_create_class = False
class TestConv3DOp(XPUOpTest):
def setUp(self):
self.dtype = self.in_type
self.op_type = "conv3d"
self.use_cudnn = False
self.use_mkldnn = False
self.data_format = "AnyLayout"
self.init_kernel_type()
self.init_group()
self.init_dilation()
self.init_test_case()
conv3d_param = {
'stride': self.stride,
'pad': self.pad,
'dilations': self.dilations,
}
np.random.seed(100)
input = np.random.random(self.input_size).astype(self.dtype)
filter = np.random.random(self.filter_size).astype(self.dtype)
output = conv3d_forward_naive(
input,
filter,
self.groups,
conv3d_param,
).astype(self.dtype)
self.inputs = {
'Input': XPUOpTest.np_dtype_to_fluid_dtype(input),
'Filter': XPUOpTest.np_dtype_to_fluid_dtype(filter),
}
self.attrs = {
'strides': self.stride,
'paddings': self.pad,
'groups': self.groups,
'dilations': self.dilations,
'use_cudnn': self.use_cudnn,
'use_mkldnn': self.use_mkldnn,
'data_format': self.data_format,
}
self.outputs = {'Output': output}
def test_check_output(self):
place = paddle.XPUPlace(0)
self.check_output_with_place(place)
def init_test_case(self):
self.pad = [0, 0, 0]
self.stride = [1, 1, 1]
self.input_size = [2, 3, 4, 4, 4] # NCDHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3, 3]
def init_test_case_2(self):
pass
def init_dilation(self):
self.dilations = [1, 1, 1]
def init_group(self):
self.groups = 1
def init_kernel_type(self):
pass
class TestCase1(TestConv3DOp):
def init_test_case(self):
self.pad = [1, 1, 1]
self.stride = [1, 1, 1]
self.input_size = [2, 3, 4, 4, 4] # NCDHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3, 3]
class TestWithGroup1(TestConv3DOp):
def init_group(self):
self.groups = 3
class TestWithGroup2(TestCase1):
def init_group(self):
self.groups = 3
class TestWith1x1(TestConv3DOp):
def init_test_case(self):
self.pad = [0, 0, 0]
self.stride = [1, 1, 1]
self.input_size = [2, 3, 4, 4, 4]
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [120, f_c, 1, 1, 1]
def init_dilation(self):
self.dilations = [1, 1, 1]
def init_group(self):
self.groups = 3
class TestWithInput1x1Filter1x1(TestConv3DOp):
def init_test_case(self):
self.pad = [0, 0, 0]
self.stride = [1, 1, 1]
self.input_size = [40, 3, 1, 1, 1]
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [120, f_c, 1, 1, 1]
def init_dilation(self):
self.dilations = [1, 1, 1]
def init_group(self):
self.groups = 3
class TestWithDilation(TestConv3DOp):
def init_test_case(self):
self.pad = [0, 0, 0]
self.stride = [1, 1, 1]
self.input_size = [2, 3, 6, 6, 6]
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [24, f_c, 2, 2, 2]
def init_dilation(self):
self.dilations = [2, 2, 2]
def init_group(self):
self.groups = 3
# ---- test asymmetric padding ----
class XPUTestConv3DOp_v2(XPUOpTestWrapper):
def __init__(self):
self.op_name = 'conv3d'
self.use_dynamic_create_class = False
class TestConv3DOp_2(XPUOpTest):
def setUp(self):
self.dtype = self.in_type
self.op_type = "conv3d"
self.use_cudnn = False
self.use_mkldnn = False
self.data_format = "NCDHW"
self.init_kernel_type()
self.init_group()
self.init_dilation()
self.init_data_format()
self.init_test_case()
self.init_paddings()
self.init_test_case_2()
conv3d_param = {
'stride': self.stride,
'pad': self.pad,
'dilations': self.dilations,
}
np.random.seed(100)
input = np.random.random(self.input_size).astype(self.dtype)
filter = np.random.random(self.filter_size).astype(self.dtype)
output = conv3d_forward_naive(
input,
filter,
self.groups,
conv3d_param,
self.padding_algorithm,
self.data_format,
).astype(self.dtype)
self.inputs = {
'Input': XPUOpTest.np_dtype_to_fluid_dtype(input),
'Filter': XPUOpTest.np_dtype_to_fluid_dtype(filter),
}
self.attrs = {
'strides': self.stride,
'paddings': self.pad,
'padding_algorithm': self.padding_algorithm,
'groups': self.groups,
'dilations': self.dilations,
'use_cudnn': self.use_cudnn,
'use_mkldnn': self.use_mkldnn,
'data_format': self.data_format,
}
self.outputs = {'Output': output}
def test_check_output(self):
place = paddle.XPUPlace(0)
self.check_output_with_place(place)
def init_test_case(self):
self.stride = [1, 1, 1]
self.input_size = [2, 3, 4, 4, 4] # NCDHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3, 3]
def init_test_case_2(self):
pass
def init_dilation(self):
self.dilations = [1, 1, 1]
def init_group(self):
self.groups = 1
def init_kernel_type(self):
pass
def init_paddings(self):
self.pad = [0, 0, 0]
self.padding_algorithm = "EXPLICIT"
def init_data_format(self):
self.data_format = "NCDHW"
class TestConv3DOp_AsyPadding(TestConv3DOp_2):
def init_test_case(self):
self.stride = [1, 1, 2]
self.input_size = [2, 3, 4, 4, 4] # NCDHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3, 3]
def init_paddings(self):
self.pad = [1, 0, 1, 0, 0, 2]
self.padding_algorithm = "EXPLICIT"
class TestConv3DOp_DiffDataInDiffDim(TestConv3DOp_2):
def init_test_case(self):
self.stride = [1, 1, 2]
self.input_size = [2, 3, 4, 5, 5] # NCDHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 4, 3]
def init_paddings(self):
self.pad = [1, 0, 1, 0, 0, 2]
self.padding_algorithm = "EXPLICIT"
class TestCase1_AsyPadding(TestConv3DOp_2):
def init_test_case(self):
self.stride = [1, 1, 1]
self.input_size = [2, 3, 4, 4, 4] # NCDHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3, 3]
def init_paddings(self):
self.pad = [0, 0, 1, 0, 0, 2]
self.padding_algorithm = "EXPLICIT"
class TestWithGroup1_AsyPadding(TestConv3DOp_2):
def init_group(self):
self.groups = 3
def init_paddings(self):
self.pad = [1, 1, 1, 0, 0, 2]
self.padding_algorithm = "EXPLICIT"
class TestWithGroup2_AsyPadding(TestConv3DOp_2):
def init_test_case(self):
self.stride = [1, 1, 1]
self.input_size = [2, 3, 4, 4, 4] # NCDHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3, 3]
def init_group(self):
self.groups = 3
def init_paddings(self):
self.pad = [1, 1, 0, 1, 0, 2]
self.padding_algorithm = "EXPLICIT"
class TestWithDilation_AsyPadding(TestConv3DOp_2):
def init_test_case(self):
self.stride = [1, 1, 1]
self.input_size = [2, 3, 6, 6, 6]
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [24, f_c, 2, 2, 2]
def init_dilation(self):
self.dilations = [2, 2, 2]
def init_group(self):
self.groups = 3
def init_paddings(self):
self.pad = [0, 0, 1, 0, 1, 0]
self.padding_algorithm = "EXPLICIT"
# --------- test python API ---------------
class TestConv3DAPI(unittest.TestCase):
def test_api(self):
input_NDHWC = fluid.layers.data(
name="input_NDHWC",
shape=[2, 5, 5, 5, 3],
append_batch_size=False,
dtype="float32",
)
input_NCDHW = fluid.layers.data(
name="input_NCDHW",
shape=[2, 3, 5, 5, 3],
append_batch_size=False,
dtype="float32",
)
fluid.layers.conv3d(
input=input_NDHWC,
num_filters=3,
filter_size=[3, 3, 3],
stride=[1, 1, 1],
padding=0,
dilation=[1, 1, 1],
groups=1,
data_format="NCDHW",
)
fluid.layers.conv3d(
input=input_NCDHW,
num_filters=3,
filter_size=[3, 3, 3],
stride=[1, 1, 1],
padding=[1, 2, 1, 0, 1, 0],
dilation=[1, 1, 1],
groups=1,
data_format="NCDHW",
)
fluid.layers.conv3d(
input=input_NCDHW,
num_filters=3,
filter_size=[3, 3, 3],
stride=[1, 1, 1],
padding=[[0, 0], [0, 0], [1, 1], [1, 1], [1, 1]],
dilation=[1, 1, 1],
groups=1,
data_format="NCDHW",
)
fluid.layers.conv3d(
input=input_NDHWC,
num_filters=3,
filter_size=[3, 3, 3],
stride=[1, 1, 1],
padding=[[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]],
dilation=[1, 1, 1],
groups=1,
data_format="NDHWC",
)
fluid.layers.conv3d(
input=input_NCDHW,
num_filters=3,
filter_size=[3, 3, 3],
stride=[1, 1, 1],
padding="SAME",
dilation=[1, 1, 1],
groups=1,
data_format="NCDHW",
)
fluid.layers.conv3d(
input=input_NCDHW,
num_filters=3,
filter_size=[3, 3, 3],
stride=[1, 1, 1],
padding="VALID",
dilation=[1, 1, 1],
groups=1,
data_format="NCDHW",
)
class TestConv3DAPI_Error(unittest.TestCase):
def test_api(self):
input = fluid.layers.data(
name="input",
shape=[2, 5, 5, 5, 4],
append_batch_size=False,
dtype="float32",
)
# ValueError: cudnn
def run_1():
fluid.layers.conv3d(
input=input,
num_filters=3,
filter_size=3,
stride=1,
padding=0,
dilation=1,
groups=1,
use_cudnn=[0],
data_format="NCDHW",
)
self.assertRaises(ValueError, run_1)
# ValueError: data_format
def run_2():
fluid.layers.conv3d(
input=input,
num_filters=3,
filter_size=[3, 3, 3],
stride=[1, 1, 1],
padding=0,
dilation=[1, 1, 1],
groups=1,
use_cudnn=False,
data_format="NCHWC",
)
self.assertRaises(ValueError, run_2)
# ValueError: padding
def run_3():
fluid.layers.conv3d(
input=input,
num_filters=3,
filter_size=3,
stride=1,
padding="SAMEE",
dilation=1,
groups=1,
use_cudnn=False,
data_format="NCDHW",
)
self.assertRaises(ValueError, run_3)
def run_4():
fluid.layers.conv3d(
input=input,
num_filters=3,
filter_size=3,
stride=1,
padding=[[0, 1], [0, 0], [0, 1], [0, 1], [0, 1]],
dilation=1,
groups=1,
use_cudnn=False,
data_format="NCDHW",
)
self.assertRaises(ValueError, run_4)
def run_5():
fluid.layers.conv3d(
input=input,
num_filters=3,
filter_size=0,
stride=0,
padding=[[0, 1], [0, 1], [0, 1], [0, 1], [0, 1]],
dilation=1,
groups=1,
use_cudnn=False,
data_format="NDHWC",
)
self.assertRaises(ValueError, run_5)
# ValueError: channel dimmention
x = fluid.layers.data(
name="x",
shape=[2, 5, 5, 5, -1],
append_batch_size=False,
dtype="float32",
)
def run_6():
fluid.layers.conv3d(
input=x,
num_filters=3,
filter_size=3,
stride=1,
padding=0,
dilation=1,
groups=1,
use_cudnn=False,
data_format="NDHWC",
)
self.assertRaises(ValueError, run_6)
# ValueError: groups
def run_7():
fluid.layers.conv3d(
input=input,
num_filters=3,
filter_size=3,
stride=1,
padding=0,
dilation=1,
groups=3,
use_cudnn=False,
data_format="NDHWC",
)
self.assertRaises(ValueError, run_7)
# ValueError: filter num
def run_8():
fluid.layers.conv3d(
input=input,
num_filters=0,
filter_size=0,
stride=0,
padding=0,
dilation=0,
groups=1,
use_cudnn=False,
data_format="NDHWC",
)
self.assertRaises(ValueError, run_8)
for stype in ["float32"]:
create_test_class(globals(), XPUTestConv3DOp, stype)
create_test_class(globals(), XPUTestConv3DOp_v2, stype)
if __name__ == '__main__':
unittest.main()
......@@ -58,6 +58,9 @@ class XPUTestMaskedSelectOp(XPUOpTestWrapper):
def test_check_output(self):
self.check_output_with_place(self.place)
def test_check_grad(self):
self.check_grad_with_place(self.place, ['X'], 'Y')
def init(self):
self.shape = (50, 3)
......
......@@ -19,6 +19,8 @@ import sys
sys.path.append("..")
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.op import Operator
from op_test_xpu import XPUOpTest
from xpu.get_test_cover_info import (
......@@ -83,6 +85,77 @@ class TestSGDOpWithLargeInput(unittest.TestCase):
result = exe.run(fluid.default_main_program(), fetch_list=[avg_cost])
class TestSparseSGDOp(unittest.TestCase):
def check_with_place(self, place):
scope = core.Scope()
# create and initialize Grad Variable
height = 10
rows = [0, 4, 7]
self.conf()
grad_selected_rows = scope.var('Grad').get_selected_rows()
grad_selected_rows.set_height(height)
grad_selected_rows.set_rows(rows)
np_array = np.ones((len(rows), self.row_numel)).astype("float32")
np_array[0, 0] = 2.0
np_array[2, 8] = 4.0
grad_tensor = grad_selected_rows.get_tensor()
grad_tensor.set(np_array, place)
# create and initialize Param Variable
param = scope.var('Param').get_tensor()
param_array = np.full((height, self.row_numel), 5.0).astype("float32")
param.set(param_array, place)
# create and initialize LeraningRate Variable
lr = scope.var('LearningRate').get_tensor()
lr_array = np.full((1), 2.0).astype("float32")
lr.set(lr_array, place)
# create and run sgd operator
sgd_op = Operator(
"sgd",
Param='Param',
Grad='Grad',
ParamOut='Param',
LearningRate='LearningRate',
)
sgd_op.run(scope, place)
# get and compare result
result_array = np.array(param)
# rows[0] = 0, 5.0 - 2.0 * 2.0
self.assertAlmostEqual(1.0, result_array[rows[0], 0])
# rows[0] = 0, 5.0 - 2.0 * 1.0
self.assertAlmostEqual(3.0, result_array[rows[0], 2])
# 5.0 - 2.0 * 0.0
self.assertAlmostEqual(5.0, result_array[1, 0])
# rows[1] = 4, 5.0 - 2.0 * 1.0
self.assertAlmostEqual(3.0, result_array[rows[1], 10])
# 5.0 - 2.0 * 0.0
self.assertAlmostEqual(5.0, result_array[5, 8])
# rows[2] = 7, 5.0 - 2.0 * 1.0
self.assertAlmostEqual(3.0, result_array[rows[2], 1])
# rows[2] = 7, 5.0 - 2.0 * 4.0
self.assertAlmostEqual(-3.0, result_array[rows[2], 8])
def test_sparse_sgd(self):
places = [core.XPUPlace(0)]
for place in places:
self.check_with_place(place)
def conf(self):
self.row_numel = 12
class TestSparseSGDOpCase8X(TestSparseSGDOp):
def conf(self):
self.row_numel = 16
if __name__ == "__main__":
paddle.enable_static()
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册