未验证 提交 00a269de 编写于 作者: R ronnywang 提交者: GitHub

[NPU] add conv_op_npu and test (#34055)

* add conv_op_npu and test

* add more tests

* clean headers & support fp16

* update
上级 da261732
......@@ -126,6 +126,169 @@ class DepthwiseConvNPUKernel : public framework::OpKernel<T> {
}
};
template <typename T>
class NPUConvOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto& dev_ctx = ctx.template device_context<platform::NPUDeviceContext>();
const Tensor* input = ctx.Input<Tensor>("Input");
auto* filter = ctx.Input<Tensor>("Filter");
auto* output = ctx.Output<Tensor>("Output");
output->mutable_data<T>(ctx.GetPlace());
const std::vector<int> strides = ctx.Attr<std::vector<int>>("strides");
std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
std::vector<int> dilations = ctx.Attr<std::vector<int>>("dilations");
int groups = ctx.Attr<int>("groups");
const std::string padding_algorithm =
ctx.Attr<std::string>("padding_algorithm");
const std::string data_format = ctx.Attr<std::string>("data_format");
const bool channel_last = data_format == "NHWC";
// update padding and dilation
auto in_dims = input->dims();
auto filter_dims = filter->dims();
framework::DDim in_data_dims;
framework::DDim filter_data_dims;
if (channel_last) {
in_data_dims = framework::slice_ddim(in_dims, 1, in_dims.size() - 1);
} else {
in_data_dims = framework::slice_ddim(in_dims, 2, in_dims.size());
}
filter_data_dims = framework::slice_ddim(filter_dims, 2, in_dims.size());
std::vector<int> ksize = framework::vectorize<int>(filter_data_dims);
UpdatePaddingAndDilation(&paddings, &dilations, padding_algorithm,
in_data_dims, strides, ksize);
std::vector<int> strides_vec(4, 1);
std::vector<int> dilations_vec(4, 1);
Tensor input_tensor, output_tensor;
input_tensor.ShareDataWith(*input);
output_tensor.ShareDataWith(*output);
if (channel_last) {
input_tensor.set_layout(DataLayout::kNHWC);
output_tensor.set_layout(DataLayout::kNHWC);
strides_vec[1] = strides[0];
strides_vec[2] = strides[1];
dilations_vec[1] = dilations[0];
dilations_vec[2] = dilations[1];
} else {
strides_vec[2] = strides[0];
strides_vec[3] = strides[1];
dilations_vec[2] = dilations[0];
dilations_vec[3] = dilations[1];
}
const auto& runner =
NpuOpRunner("Conv2D", {input_tensor, *filter}, {output_tensor},
{{"strides", strides_vec},
{"pads", paddings},
{"dilations", dilations_vec},
{"groups", groups},
{"data_format", data_format}});
runner.Run(dev_ctx.stream());
}
};
template <typename T>
class NPUConvGradOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto& dev_ctx = ctx.template device_context<platform::NPUDeviceContext>();
auto input = ctx.Input<Tensor>("Input");
auto filter = ctx.Input<Tensor>("Filter");
auto output_grad = ctx.Input<Tensor>(framework::GradVarName("Output"));
auto input_grad = ctx.Output<Tensor>(framework::GradVarName("Input"));
auto filter_grad = ctx.Output<Tensor>(framework::GradVarName("Filter"));
const std::vector<int> strides = ctx.Attr<std::vector<int>>("strides");
std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
std::vector<int> dilations = ctx.Attr<std::vector<int>>("dilations");
int groups = ctx.Attr<int>("groups");
const std::string padding_algorithm =
ctx.Attr<std::string>("padding_algorithm");
const std::string data_format = ctx.Attr<std::string>("data_format");
const bool channel_last = data_format == "NHWC";
// update padding and dilation
auto in_dims = input->dims();
auto filter_dims = filter->dims();
framework::DDim in_data_dims;
framework::DDim filter_data_dims;
if (channel_last) {
in_data_dims = framework::slice_ddim(in_dims, 1, in_dims.size() - 1);
} else {
in_data_dims = framework::slice_ddim(in_dims, 2, in_dims.size());
}
filter_data_dims = framework::slice_ddim(filter_dims, 2, in_dims.size());
std::vector<int> ksize = framework::vectorize<int>(filter_data_dims);
UpdatePaddingAndDilation(&paddings, &dilations, padding_algorithm,
in_data_dims, strides, ksize);
std::vector<int> strides_vec(4, 1);
std::vector<int> dilations_vec(4, 1);
Tensor input_tensor, output_grad_tensor;
input_tensor.ShareDataWith(*input);
output_grad_tensor.ShareDataWith(*output_grad);
if (channel_last) {
input_tensor.set_layout(DataLayout::kNHWC);
output_grad_tensor.set_layout(DataLayout::kNHWC);
strides_vec[1] = strides[0];
strides_vec[2] = strides[1];
dilations_vec[1] = dilations[0];
dilations_vec[2] = dilations[1];
} else {
strides_vec[2] = strides[0];
strides_vec[3] = strides[1];
dilations_vec[2] = dilations[0];
dilations_vec[3] = dilations[1];
}
if (filter_grad) {
filter_grad->mutable_data<T>(ctx.GetPlace());
std::vector<int> filter_shape_vec =
framework::vectorize<int>(filter->dims());
const auto& runner = NpuOpRunner(
"Conv2DBackpropFilterD", {input_tensor, output_grad_tensor},
{*filter_grad}, {{"filter_size", filter_shape_vec},
{"strides", strides_vec},
{"pads", paddings},
{"dilations", dilations_vec},
{"groups", groups},
{"data_format", data_format}});
runner.Run(dev_ctx.stream());
}
if (input_grad) {
input_grad->mutable_data<T>(ctx.GetPlace());
std::vector<int> input_shape_vec =
framework::vectorize<int>(input->dims());
Tensor input_grad_tensor;
input_grad_tensor.ShareDataWith(*input_grad);
if (channel_last) {
input_grad_tensor.set_layout(DataLayout::kNHWC);
}
const auto& runner =
NpuOpRunner("Conv2DBackpropInputD", {*filter, output_grad_tensor},
{input_grad_tensor}, {{"input_size", input_shape_vec},
{"strides", strides_vec},
{"pads", paddings},
{"dilations", dilations_vec},
{"groups", groups},
{"data_format", data_format}});
runner.Run(dev_ctx.stream());
}
}
};
} // namespace operators
} // namespace paddle
......@@ -135,3 +298,7 @@ REGISTER_OP_NPU_KERNEL(
depthwise_conv2d,
ops::DepthwiseConvNPUKernel<paddle::platform::NPUDeviceContext,
paddle::platform::float16>);
REGISTER_OP_NPU_KERNEL(conv2d, ops::NPUConvOpKernel<float>,
ops::NPUConvOpKernel<paddle::platform::float16>);
REGISTER_OP_NPU_KERNEL(conv2d_grad, ops::NPUConvGradOpKernel<float>,
ops::NPUConvGradOpKernel<paddle::platform::float16>);
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
import sys
sys.path.append("..")
import paddle
import paddle.fluid.core as core
import paddle.fluid as fluid
from op_test import OpTest
from test_conv2d_op import conv2d_forward_naive
paddle.enable_static()
def create_test_channel_last_class(parent):
class TestChannelLastCase(parent):
def init_data_format(self):
self.data_format = "NHWC"
def init_test_case_2(self):
N, C, H, W = self.input_size
self.input_size = [N, H, W, C]
cls_name = "{0}_{1}".format(parent.__name__, "ChannelLast")
TestChannelLastCase.__name__ = cls_name
globals()[cls_name] = TestChannelLastCase
def create_test_padding_SAME_class(parent):
class TestPaddingSMAECase(parent):
def init_paddings(self):
self.pad = [0, 0]
self.padding_algorithm = "SAME"
cls_name = "{0}_{1}".format(parent.__name__, "PaddingSAMEOp")
TestPaddingSMAECase.__name__ = cls_name
globals()[cls_name] = TestPaddingSMAECase
def create_test_padding_VALID_class(parent):
class TestPaddingVALIDCase(parent):
def init_paddings(self):
self.pad = [1, 1]
self.padding_algorithm = "VALID"
cls_name = "{0}_{1}".format(parent.__name__, "PaddingVALIDOp")
TestPaddingVALIDCase.__name__ = cls_name
globals()[cls_name] = TestPaddingVALIDCase
def create_test_fp16_class(parent):
class TestFp16Case(parent):
def init_dtype(self):
self.dtype = np.float16
cls_name = "{0}_{1}".format(parent.__name__, "Fp16")
TestFp16Case.__name__ = cls_name
globals()[cls_name] = TestFp16Case
class TestConv2DOp(OpTest):
def set_npu(self):
self.__class__.use_npu = True
def init_dtype(self):
self.dtype = np.float32
def init_data_format(self):
self.data_format = "NCHW"
def setUp(self):
self.set_npu()
self.op_type = "conv2d"
self.init_data_format()
self.init_dtype()
self.init_group()
self.init_dilation()
self.init_test_case()
conv2d_param = {
'stride': self.stride,
'pad': self.pad,
'dilation': self.dilations
}
input = np.random.random(self.input_size).astype(self.dtype)
filter = np.random.uniform(-1, 1, self.filter_size).astype(self.dtype)
output, _, _, _, _ = conv2d_forward_naive(
input,
filter,
self.groups,
conv2d_param,
data_format=self.data_format)
output = output.astype(self.dtype)
self.inputs = {
'Input': OpTest.np_dtype_to_fluid_dtype(input),
'Filter': OpTest.np_dtype_to_fluid_dtype(filter)
}
self.attrs = {
'strides': self.stride,
'paddings': self.pad,
'groups': self.groups,
'dilations': self.dilations,
'data_format': self.data_format,
}
self.outputs = {'Output': output}
def test_check_output(self):
self.check_output_with_place(fluid.NPUPlace(0), atol=1e-2)
def test_check_grad(self):
self.check_grad_with_place(
fluid.NPUPlace(0), {'Input', 'Filter'},
'Output',
max_relative_error=0.03)
def test_check_grad_no_filter(self):
self.check_grad_with_place(
fluid.NPUPlace(0), ['Input'],
'Output',
max_relative_error=0.03,
no_grad_set=set(['Filter']))
def test_check_grad_no_input(self):
self.check_grad_with_place(
fluid.NPUPlace(0), ['Filter'],
'Output',
max_relative_error=0.03,
no_grad_set=set(['Input']))
def init_test_case(self):
self.pad = [0, 0]
self.stride = [1, 1]
self.input_size = [2, 3, 5, 5] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3]
def init_dilation(self):
self.dilations = [1, 1]
def init_group(self):
self.groups = 1
class TestWithPad(TestConv2DOp):
def init_test_case(self):
self.pad = [1, 1]
self.stride = [1, 1]
self.input_size = [2, 3, 5, 5] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3]
class TestWithStride(TestConv2DOp):
def init_test_case(self):
self.pad = [1, 1]
self.stride = [2, 2]
self.input_size = [2, 3, 6, 6] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3]
class TestWithGroup(TestConv2DOp):
def init_test_case(self):
self.pad = [0, 0]
self.stride = [1, 1]
self.input_size = [2, 3, 5, 5] # NCHW
self.group = 3
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [18, f_c, 3, 3]
class TestWith1x1(TestConv2DOp):
def init_test_case(self):
self.pad = [0, 0]
self.stride = [1, 1]
self.input_size = [2, 3, 5, 5] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [120, f_c, 1, 1]
def init_group(self):
# FIXME: Supporting group = 3 in this case.
# NOTE(wangran16): There is an unknown error (acl error code is : 507015)
# when group = 3, which needs to be fixed.
self.groups = 1
class TestWithDepthWise5x5(TestConv2DOp):
def init_test_case(self):
self.pad = [0, 0]
self.stride = [1, 1]
self.input_size = [2, 4, 10, 10] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [8, f_c, 5, 5]
def init_group(self):
self.groups = 4
class TestWithDepthWise7x7(TestConv2DOp):
def init_test_case(self):
self.pad = [1, 1]
self.stride = [2, 2]
self.input_size = [2, 8, 10, 10] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [16, f_c, 7, 7]
def init_group(self):
self.groups = 8
class TestWithDilation(TestConv2DOp):
def init_test_case(self):
self.pad = [0, 0]
self.stride = [1, 1]
self.input_size = [2, 3, 10, 10] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [12, f_c, 3, 3]
def init_dilation(self):
self.dilations = [2, 2]
def init_group(self):
self.groups = 3
class TestWithInput1x1Filter1x1(TestConv2DOp):
def init_test_case(self):
self.pad = [0, 0]
self.stride = [1, 1]
self.input_size = [100, 1, 1, 1] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [120, f_c, 1, 1]
def init_group(self):
self.groups = 1
class TestConv2DOp_v2(OpTest):
def set_npu(self):
self.__class__.use_npu = True
def setUp(self):
self.set_npu()
self.op_type = "conv2d"
self.dtype = np.float32
self.init_kernel_type()
self.init_group()
self.init_dilation()
self.init_data_format()
self.init_test_case()
self.init_paddings()
self.init_test_case_2()
conv2d_param = {
'stride': self.stride,
'pad': self.pad,
'dilation': self.dilations
}
input = np.random.random(self.input_size).astype(self.dtype)
filter = np.random.uniform(-1, 1, self.filter_size).astype(self.dtype)
output, _, _, _, _ = conv2d_forward_naive(
input, filter, self.groups, conv2d_param, self.padding_algorithm,
self.data_format)
output = output.astype(self.dtype)
self.inputs = {
'Input': OpTest.np_dtype_to_fluid_dtype(input),
'Filter': OpTest.np_dtype_to_fluid_dtype(filter)
}
self.attrs = {
'strides': self.stride,
'paddings': self.pad,
'padding_algorithm': self.padding_algorithm,
'groups': self.groups,
'dilations': self.dilations,
'data_format': self.data_format,
}
self.outputs = {'Output': output}
def test_check_output(self):
self.check_output_with_place(paddle.NPUPlace(0), atol=1e-2)
def test_check_grad(self):
self.check_grad_with_place(
paddle.NPUPlace(0), {'Input', 'Filter'},
'Output',
max_relative_error=0.02)
def test_check_grad_no_filter(self):
self.check_grad_with_place(
paddle.NPUPlace(0), ['Input'],
'Output',
max_relative_error=0.02,
no_grad_set=set(['Filter']))
def test_check_grad_no_input(self):
self.check_grad_with_place(
paddle.NPUPlace(0), ['Filter'],
'Output',
no_grad_set=set(['Input']))
def init_test_case(self):
self.pad = [0, 0]
self.stride = [1, 2]
self.input_size = [2, 3, 5, 5] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 4, 3]
def init_dilation(self):
self.dilations = [1, 1]
def init_group(self):
self.groups = 1
def init_kernel_type(self):
pass
def init_paddings(self):
self.pad = [0, 0]
self.padding_algorithm = "EXPLICIT"
def init_data_format(self):
self.data_format = "NCHW"
def init_test_case_2(self):
pass
class TestConv2DOp_AsyPadding(TestConv2DOp_v2):
def init_paddings(self):
self.pad = [0, 0, 1, 2]
self.padding_algorithm = "EXPLICIT"
class TestWithPad_AsyPadding(TestConv2DOp_v2):
def init_test_case(self):
self.stride = [1, 1]
self.input_size = [2, 3, 5, 5] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3]
def init_paddings(self):
self.pad = [2, 1, 3, 2]
self.padding_algorithm = "EXPLICIT"
class TestWithStride_AsyPadding(TestConv2DOp_v2):
def init_test_case(self):
self.stride = [2, 2]
self.input_size = [2, 3, 6, 6] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3]
def init_paddings(self):
self.pad = [2, 1, 3, 2]
self.padding_algorithm = "EXPLICIT"
class TestWithGroup_AsyPadding(TestConv2DOp_v2):
def init_test_case(self):
self.pad = [0, 0]
self.stride = [1, 2]
self.input_size = [2, 3, 5, 5] # NCHW
self.group = 3
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [24, f_c, 4, 3]
class TestWith1x1_AsyPadding(TestConv2DOp_v2):
def init_test_case(self):
self.stride = [1, 1]
self.input_size = [2, 3, 5, 5] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [120, f_c, 1, 1]
def init_group(self):
self.groups = 1
def init_paddings(self):
self.pad = [2, 2, 4, 0]
self.padding_algorithm = "EXPLICIT"
class TestWithDepthWise3x3_AsyPadding(TestConv2DOp_v2):
def init_test_case(self):
self.stride = [1, 1]
self.input_size = [3, 4, 10, 10] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [16, f_c, 3, 3]
def init_dilation(self):
self.dilations = [2, 2]
def init_group(self):
self.groups = 4
def init_paddings(self):
self.pad = [1, 3, 2, 1]
self.padding_algorithm = "EXPLICIT"
class TestWithDepthWise5x5_AsyPadding(TestConv2DOp_v2):
def init_test_case(self):
self.stride = [1, 1]
self.input_size = [2, 4, 10, 10] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [8, f_c, 5, 5]
def init_group(self):
self.groups = 4
def init_paddings(self):
self.pad = [0, 1, 1, 0]
self.padding_algorithm = "EXPLICIT"
class TestWithDepthWise7x7_AsyPadding(TestConv2DOp_v2):
def init_test_case(self):
self.stride = [2, 2]
self.input_size = [2, 8, 10, 10] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [16, f_c, 7, 7]
def init_group(self):
self.groups = 8
def init_paddings(self):
self.pad = [1, 3, 4, 1]
self.padding_algorithm = "EXPLICIT"
class TestWithDilation_AsyPadding(TestConv2DOp_v2):
def init_test_case(self):
self.stride = [1, 1]
self.input_size = [2, 3, 10, 10] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [24, f_c, 3, 3]
def init_dilation(self):
self.dilations = [2, 2]
def init_group(self):
self.groups = 3
def init_paddings(self):
self.pad = [0, 1, 3, 0]
self.padding_algorithm = "EXPLICIT"
class TestWithInput1x1Filter1x1_AsyPadding(TestConv2DOp_v2):
def init_test_case(self):
self.stride = [1, 1]
self.input_size = [100, 1, 1, 1] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [120, f_c, 1, 1]
def init_group(self):
self.groups = 1
def init_paddings(self):
self.pad = [0, 3, 4, 0]
self.padding_algorithm = "EXPLICIT"
create_test_padding_SAME_class(TestConv2DOp_AsyPadding)
create_test_padding_SAME_class(TestWithPad_AsyPadding)
create_test_padding_SAME_class(TestWithStride_AsyPadding)
create_test_padding_SAME_class(TestWithGroup_AsyPadding)
create_test_padding_SAME_class(TestWithInput1x1Filter1x1_AsyPadding)
create_test_padding_VALID_class(TestConv2DOp_AsyPadding)
create_test_padding_VALID_class(TestWithPad_AsyPadding)
create_test_padding_VALID_class(TestWithStride_AsyPadding)
create_test_padding_VALID_class(TestWithGroup_AsyPadding)
create_test_padding_VALID_class(TestWithInput1x1Filter1x1_AsyPadding)
create_test_channel_last_class(TestConv2DOp_AsyPadding)
create_test_channel_last_class(TestWithPad_AsyPadding)
create_test_channel_last_class(TestWithGroup_AsyPadding)
create_test_channel_last_class(TestWith1x1_AsyPadding)
create_test_channel_last_class(TestWithInput1x1Filter1x1_AsyPadding)
create_test_fp16_class(TestConv2DOp_AsyPadding)
create_test_fp16_class(TestWithPad_AsyPadding)
create_test_fp16_class(TestWithStride_AsyPadding)
create_test_fp16_class(TestWithGroup_AsyPadding)
create_test_fp16_class(TestWithInput1x1Filter1x1_AsyPadding)
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册