From 5235ec53fd9f224c358ee8ffe4b71d1cc4fc4853 Mon Sep 17 00:00:00 2001 From: ronnywang Date: Thu, 1 Sep 2022 17:19:55 +0800 Subject: [PATCH] [NPU] add npu ops: instance_norm, conv3d_transpose (#45636) * [NPU] add conv3d_transpose, instance_norm, instance_norm_grad * add ut * remove instance_norm_grad --- .../fluid/operators/conv_transpose_op_npu.cc | 104 +++++++ .../fluid/operators/instance_norm_op_npu.cc | 93 +++++++ .../npu/test_conv3d_transpose_op_npu.py | 258 ++++++++++++++++++ .../npu/test_instance_norm_op_npu.py | 90 ++++++ 4 files changed, 545 insertions(+) create mode 100644 paddle/fluid/operators/instance_norm_op_npu.cc create mode 100644 python/paddle/fluid/tests/unittests/npu/test_conv3d_transpose_op_npu.py create mode 100644 python/paddle/fluid/tests/unittests/npu/test_instance_norm_op_npu.py diff --git a/paddle/fluid/operators/conv_transpose_op_npu.cc b/paddle/fluid/operators/conv_transpose_op_npu.cc index 181ecf2ca5..94a6825ff6 100644 --- a/paddle/fluid/operators/conv_transpose_op_npu.cc +++ b/paddle/fluid/operators/conv_transpose_op_npu.cc @@ -199,6 +199,106 @@ class Conv2DTransposeGradNPUKernel : public framework::OpKernel { } }; +template +class Conv3DTransposeNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + const Tensor* input = ctx.Input("Input"); + const Tensor* filter = ctx.Input("Filter"); + Tensor* output = ctx.Output("Output"); + output->mutable_data(ctx.GetPlace()); + std::vector output_padding = + ctx.Attr>("output_padding"); + const std::vector stride = ctx.Attr>("strides"); + std::vector padding = ctx.Attr>("paddings"); + std::vector dilation = ctx.Attr>("dilations"); + std::string data_format = ctx.Attr("data_format"); + int groups = ctx.Attr("groups"); + const std::string padding_algorithm = + ctx.Attr("padding_algorithm"); + + // check dimension + const bool channel_last = data_format == "NHWC"; + + if (data_format == "NHWC") { + data_format = "NDHWC"; + } else { + data_format = "NCDHW"; + } + + // update padding and dilation + auto in_dims = input->dims(); + auto filter_dims = filter->dims(); + framework::DDim in_data_dims; + framework::DDim filter_data_dims; + + if (channel_last) { + in_data_dims = phi::slice_ddim(in_dims, 1, in_dims.size() - 1); + } else { + in_data_dims = phi::slice_ddim(in_dims, 2, in_dims.size()); + } + filter_data_dims = phi::slice_ddim(filter_dims, 2, in_dims.size()); + + std::vector ksize = phi::vectorize(filter_data_dims); + phi::UpdatePaddingAndDilation( + &padding, &dilation, padding_algorithm, in_data_dims, stride, ksize); + + // construct NPU attr + std::vector strides(5, 1); + std::vector dilations(5, 1); + + Tensor input_tensor, output_tensor, filter_tensor; + input_tensor.Resize(input->dims()); + input_tensor.ShareDataWith(*input); + output_tensor.Resize(output->dims()); + output_tensor.ShareDataWith(*output); + filter_tensor.Resize(filter->dims()); + filter_tensor.ShareDataWith(*filter); + + PADDLE_ENFORCE_EQ( + dilation[0], + 1, + platform::errors::InvalidArgument( + "dilation[0] must be equal 1, but received %d.", dilation[0])); + + if (channel_last) { + input_tensor.set_layout(DataLayout::kNDHWC); + output_tensor.set_layout(DataLayout::kNDHWC); + strides[1] = stride[0]; + strides[2] = stride[1]; + strides[3] = stride[2]; + dilations[2] = dilation[1]; + dilations[3] = dilation[2]; + } else { + input_tensor.set_layout(DataLayout::kNCDHW); + output_tensor.set_layout(DataLayout::kNCDHW); + strides[2] = stride[0]; + strides[3] = stride[1]; + strides[4] = stride[2]; + dilations[3] = dilation[1]; + dilations[4] = dilation[2]; + } + filter_tensor.set_layout(DataLayout::kNCDHW); + + auto output_dim_vec = phi::vectorize(output_tensor.dims()); + + auto& dev_ctx = ctx.template device_context(); + + NpuOpRunner runner; + runner.SetType("Conv3DBackpropInputD") + .AddInput(filter_tensor) + .AddInput(input_tensor) + .AddAttr("input_size", output_dim_vec) + .AddAttr("strides", strides) + .AddAttr("pads", padding) + .AddAttr("dilations", dilations) + .AddAttr("groups", groups) + .AddAttr("data_format", data_format) + .AddOutput(output_tensor); + runner.Run(dev_ctx.stream()); + } +}; + } // namespace operators } // namespace paddle @@ -212,3 +312,7 @@ REGISTER_OP_NPU_KERNEL(conv2d_transpose, REGISTER_OP_NPU_KERNEL(conv2d_transpose_grad, ops::Conv2DTransposeGradNPUKernel, ops::Conv2DTransposeGradNPUKernel); + +REGISTER_OP_NPU_KERNEL(conv3d_transpose, + ops::Conv3DTransposeNPUKernel, + ops::Conv3DTransposeNPUKernel); diff --git a/paddle/fluid/operators/instance_norm_op_npu.cc b/paddle/fluid/operators/instance_norm_op_npu.cc new file mode 100644 index 0000000000..89c6a310d7 --- /dev/null +++ b/paddle/fluid/operators/instance_norm_op_npu.cc @@ -0,0 +1,93 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/platform/device/npu/npu_op_runner.h" +#include "paddle/phi/kernels/funcs/math_function.h" + +namespace paddle { +namespace operators { +using Tensor = framework::Tensor; + +template +class InstanceNormNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + const auto epsilon = ctx.Attr("epsilon"); + const auto* x = ctx.Input("X"); + const auto* scale = ctx.Input("Scale"); + const auto* bias = ctx.Input("Bias"); + auto* y = ctx.Output("Y"); + auto* mean = ctx.Output("SavedMean"); + auto* variance = ctx.Output("SavedVariance"); + auto& dev_ctx = ctx.template device_context(); + + dev_ctx.template Alloc(y); + dev_ctx.template Alloc(mean); + dev_ctx.template Alloc(variance); + + auto x_dims = x->dims(); + auto y_dims = y->dims(); + + PADDLE_ENFORCE(x_dims.size() <= 5 && x_dims.size() >= 3, + platform::errors::InvalidArgument( + "InstanceNorm only supports the dimension of input " + " less equal to 5 and greater equal to 3. the dimension " + "of input is %d.", + x_dims.size())); + + auto tmp_x_dims = phi::vectorize(x_dims); + auto tmp_y_dims = phi::vectorize(y_dims); + if (x_dims.size() < 5) { + for (size_t i = x_dims.size(); i < 5; ++i) { + tmp_x_dims.insert(tmp_x_dims.begin() + 2, 1); + tmp_y_dims.insert(tmp_y_dims.begin() + 2, 1); + } + } + + Tensor tmp_x, tmp_y; + tmp_x.ShareDataWith(*x); + + tmp_x.Resize(phi::make_ddim(tmp_x_dims)); + tmp_x.set_layout(paddle::framework::DataLayout::NCDHW); + tmp_y.ShareDataWith(*y); + tmp_y.Resize(phi::make_ddim(tmp_y_dims)); + tmp_y.set_layout(paddle::framework::DataLayout::NCDHW); + + NpuOpRunner runner; + + runner.SetType("InstanceNorm") + .AddInput(tmp_x) + .AddInput(*scale) + .AddInput(*bias) + .AddAttr("data_format", std::string("NCDHW")) + .AddAttr("epsilon", epsilon) + .AddOutput(tmp_y) + .AddOutput(*mean) + .AddOutput(*variance); + runner.Run(dev_ctx.stream()); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +REGISTER_OP_NPU_KERNEL( + instance_norm, + ops::InstanceNormNPUKernel, + ops::InstanceNormNPUKernel); diff --git a/python/paddle/fluid/tests/unittests/npu/test_conv3d_transpose_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_conv3d_transpose_op_npu.py new file mode 100644 index 0000000000..0e5710e2a2 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/npu/test_conv3d_transpose_op_npu.py @@ -0,0 +1,258 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import numpy as np +import unittest +import sys + +sys.path.append("..") +from op_test import OpTest + +import paddle +import paddle.fluid.core as core +import paddle.fluid as fluid + +paddle.enable_static() + + +def conv3dtranspose_forward_naive(input_, filter_, attrs): + padding_algorithm = attrs['padding_algorithm'] + if padding_algorithm not in ["SAME", "VALID", "EXPLICIT"]: + raise ValueError("Unknown Attr(padding_algorithm): '%s'. " + "It can only be 'SAME' or 'VALID'." % + str(padding_algorithm)) + + if attrs['data_format'] == 'NHWC': + input_ = np.transpose(input_, [0, 4, 1, 2, 3]) + in_n, in_c, in_d, in_h, in_w = input_.shape + f_c, f_out_c, f_d, f_h, f_w = filter_.shape + groups = attrs['groups'] + assert in_c == f_c + out_c = f_out_c * groups + sub_in_c = in_c // groups + + stride, pad, dilations = attrs['strides'], attrs['paddings'], attrs[ + 'dilations'] + + def _get_padding_with_SAME(input_shape, kernel_size, kernel_stride): + padding = [] + for input_size, filter_size, stride_size in zip(input_shape, + kernel_size, + kernel_stride): + out_size = int((input_size + stride_size - 1) / stride_size) + pad_sum = np.max( + ((out_size - 1) * stride_size + filter_size - input_size, 0)) + pad_0 = int(pad_sum / 2) + pad_1 = int(pad_sum - pad_0) + padding.append(pad_0) + padding.append(pad_1) + return padding + + ksize = filter_.shape[2:5] + if padding_algorithm == "VALID": + pad = [0, 0, 0, 0, 0, 0] + elif padding_algorithm == "SAME": + dilations = [1, 1, 1] + input_data_shape = input_.shape[2:5] + pad = _get_padding_with_SAME(input_data_shape, ksize, stride) + + pad_d_0, pad_d_1 = pad[0], pad[0] + pad_h_0, pad_h_1 = pad[1], pad[1] + pad_w_0, pad_w_1 = pad[2], pad[2] + if len(pad) == 6: + pad_d_0, pad_d_1 = pad[0], pad[1] + pad_h_0, pad_h_1 = pad[2], pad[3] + pad_w_0, pad_w_1 = pad[4], pad[5] + + d_bolck_d = dilations[0] * (f_d - 1) + 1 + d_bolck_h = dilations[1] * (f_h - 1) + 1 + d_bolck_w = dilations[2] * (f_w - 1) + 1 + out_d = (in_d - 1) * stride[0] + d_bolck_d + out_h = (in_h - 1) * stride[1] + d_bolck_h + out_w = (in_w - 1) * stride[2] + d_bolck_w + out = np.zeros((in_n, out_c, out_d, out_h, out_w)) + + for n in range(in_n): + for d in range(in_d): + for i in range(in_h): + for j in range(in_w): + for g in range(groups): + input_masked = input_[n, + g * sub_in_c:(g + 1) * sub_in_c, + d, i, j] # (c) + input_masked = np.reshape(input_masked, + (sub_in_c, 1, 1, 1)) + input_masked = np.tile(input_masked, (1, f_d, f_h, f_w)) + + for k in range(f_out_c): + tmp_out = np.sum(input_masked * + filter_[g * sub_in_c:(g + 1) * + sub_in_c, k, :, :, :], + axis=0) + d1, d2 = d * stride[0], d * stride[0] + d_bolck_d + i1, i2 = i * stride[1], i * stride[1] + d_bolck_h + j1, j2 = j * stride[2], j * stride[2] + d_bolck_w + out[n, g * f_out_c + k, d1:d2:dilations[0], + i1:i2:dilations[1], + j1:j2:dilations[2]] += tmp_out + + out = out[:, :, pad_d_0:out_d - pad_d_1, pad_h_0:out_h - pad_h_1, + pad_w_0:out_w - pad_w_1] + if attrs['data_format'] == 'NHWC': + out = np.transpose(out, [0, 2, 3, 4, 1]) + return out + + +class TestConv3DTransposeOp(OpTest): + + def set_npu(self): + self.__class__.use_npu = True + self.place = paddle.NPUPlace(0) + + def setUp(self): + # init as conv transpose + self.check_no_input = False + self.check_no_filter = False + self.data_format = 'NCHW' + self.pad = [0, 0, 0] + self.padding_algorithm = "EXPLICIT" + self.init_op_type() + self.init_test_case() + self.set_npu() + + input_ = np.random.random(self.input_size).astype("float32") + filter_ = np.random.random(self.filter_size).astype("float32") + + self.inputs = {'Input': input_, 'Filter': filter_} + self.attrs = { + 'strides': self.stride, + 'paddings': self.pad, + 'padding_algorithm': self.padding_algorithm, + 'dilations': self.dilations, + 'groups': self.groups, + 'data_format': self.data_format + } + + output = conv3dtranspose_forward_naive(input_, filter_, + self.attrs).astype("float32") + + self.outputs = {'Output': output} + + def test_check_output(self): + self.check_output_with_place(self.place, atol=8e-3) + + def init_test_case(self): + self.pad = [0, 0, 0] + self.stride = [1, 1, 1] + self.dilations = [1, 1, 1] + self.groups = 1 + self.input_size = [1, 1, 5, 5, 5] # NCDHW + f_c = self.input_size[1] + self.filter_size = [f_c, 1, 3, 3, 3] + + def init_op_type(self): + self.op_type = "conv3d_transpose" + + +class TestWithSymmetricPad(TestConv3DTransposeOp): + + def init_test_case(self): + self.check_no_input = True + self.pad = [1, 1, 1] + self.stride = [1, 1, 1] + self.dilations = [1, 1, 1] + self.groups = 1 + self.input_size = [1, 2, 5, 5, 5] # NCDHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 3, 3] + + +class TestWithAsymmetricPad(TestConv3DTransposeOp): + + def init_test_case(self): + self.pad = [1, 0, 1, 0, 1, 2] + self.stride = [1, 1, 1] + self.dilations = [1, 1, 1] + self.groups = 1 + self.input_size = [1, 2, 5, 5, 5] # NCDHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 3, 3] + + +class TestWithSAMEPad(TestConv3DTransposeOp): + + def init_test_case(self): + self.stride = [1, 1, 2] + self.dilations = [1, 2, 1] + self.groups = 1 + self.input_size = [1, 2, 5, 5, 6] # NCDHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 3, 4] + self.padding_algorithm = 'SAME' + + +class TestWithVALIDPad(TestConv3DTransposeOp): + + def init_test_case(self): + self.stride = [2, 1, 1] + self.dilations = [1, 1, 1] + self.groups = 1 + self.input_size = [1, 2, 5, 5, 5] # NCDHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 4, 3] + self.padding_algorithm = 'VALID' + + +class TestWithStride(TestConv3DTransposeOp): + + def init_test_case(self): + self.check_no_filter = True + self.pad = [1, 1, 1] + self.stride = [2, 2, 2] + self.dilations = [1, 1, 1] + self.groups = 1 + self.input_size = [1, 2, 5, 5, 5] # NCDHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 3, 3] + + +class TestWithDilation(TestConv3DTransposeOp): + + def init_test_case(self): + self.pad = [1, 1, 1] + self.stride = [1, 1, 1] + self.dilations = [1, 2, 2] + self.groups = 1 + self.input_size = [1, 2, 5, 5, 5] # NCDHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 3, 3] + + +class Test_NHWC(TestConv3DTransposeOp): + + def init_test_case(self): + self.pad = [0, 0, 0] + self.stride = [1, 1, 1] + self.dilations = [1, 1, 1] + self.groups = 1 + self.input_size = [1, 5, 5, 5, 2] # NDHWC + f_c = self.input_size[-1] + self.filter_size = [f_c, 6, 3, 3, 3] + self.data_format = 'NHWC' + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/npu/test_instance_norm_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_instance_norm_op_npu.py new file mode 100644 index 0000000000..c7363194e9 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/npu/test_instance_norm_op_npu.py @@ -0,0 +1,90 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import numpy as np +import unittest +import sys + +sys.path.append("..") +from op_test import OpTest + +import paddle +from paddle import fluid +from paddle.static import Program, program_guard +from paddle.fluid import core +from paddle.fluid.op import Operator +from paddle.fluid.dygraph import to_variable +from paddle.fluid.framework import _test_eager_guard + +paddle.enable_static() + + +class TestInstanceNorm(unittest.TestCase): + + def test_dygraph(self): + places = [fluid.NPUPlace(0)] + for p in places: + shape = [4, 10, 4, 4] + + def compute_v1(x): + with fluid.dygraph.guard(p): + bn = fluid.dygraph.InstanceNorm(shape[1]) + y = bn(fluid.dygraph.to_variable(x)) + return y.numpy() + + def compute_v2(x): + with fluid.dygraph.guard(p): + bn = paddle.nn.InstanceNorm2D(shape[1]) + y = bn(fluid.dygraph.to_variable(x)) + return y.numpy() + + x = np.random.randn(*shape).astype("float32") + y1 = compute_v1(x) + y2 = compute_v2(x) + np.testing.assert_allclose(y1, y2, rtol=1e-03) + + def test_static(self): + places = [fluid.NPUPlace(0)] + for p in places: + exe = fluid.Executor(p) + shape = [4, 10, 16, 16] + + def compute_v1(x_np): + with program_guard(Program(), Program()): + ins = fluid.dygraph.InstanceNorm(shape[1]) + x = fluid.data(name='x', shape=x_np.shape, dtype=x_np.dtype) + y = ins(x) + exe.run(fluid.default_startup_program()) + r = exe.run(feed={'x': x_np}, fetch_list=[y])[0] + return r + + def compute_v2(x_np): + with program_guard(Program(), Program()): + ins = paddle.nn.InstanceNorm2D(shape[1]) + x = fluid.data(name='x', shape=x_np.shape, dtype=x_np.dtype) + y = ins(x) + exe.run(fluid.default_startup_program()) + r = exe.run(feed={'x': x_np}, fetch_list=[y])[0] + return r + + x = np.random.randn(*shape).astype("float32") + y1 = compute_v1(x) + y2 = compute_v2(x) + np.testing.assert_allclose(y1, y2, rtol=1e-03) + + +if __name__ == '__main__': + unittest.main() -- GitLab