未验证 提交 5235ec53 编写于 作者: R ronnywang 提交者: GitHub

[NPU] add npu ops: instance_norm, conv3d_transpose (#45636)

* [NPU] add conv3d_transpose, instance_norm, instance_norm_grad

* add ut

* remove instance_norm_grad
上级 ed2ad5d9
......@@ -199,6 +199,106 @@ class Conv2DTransposeGradNPUKernel : public framework::OpKernel<T> {
}
};
template <typename T>
class Conv3DTransposeNPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
const Tensor* input = ctx.Input<Tensor>("Input");
const Tensor* filter = ctx.Input<Tensor>("Filter");
Tensor* output = ctx.Output<Tensor>("Output");
output->mutable_data<T>(ctx.GetPlace());
std::vector<int> output_padding =
ctx.Attr<std::vector<int>>("output_padding");
const std::vector<int> stride = ctx.Attr<std::vector<int>>("strides");
std::vector<int> padding = ctx.Attr<std::vector<int>>("paddings");
std::vector<int> dilation = ctx.Attr<std::vector<int>>("dilations");
std::string data_format = ctx.Attr<std::string>("data_format");
int groups = ctx.Attr<int>("groups");
const std::string padding_algorithm =
ctx.Attr<std::string>("padding_algorithm");
// check dimension
const bool channel_last = data_format == "NHWC";
if (data_format == "NHWC") {
data_format = "NDHWC";
} else {
data_format = "NCDHW";
}
// update padding and dilation
auto in_dims = input->dims();
auto filter_dims = filter->dims();
framework::DDim in_data_dims;
framework::DDim filter_data_dims;
if (channel_last) {
in_data_dims = phi::slice_ddim(in_dims, 1, in_dims.size() - 1);
} else {
in_data_dims = phi::slice_ddim(in_dims, 2, in_dims.size());
}
filter_data_dims = phi::slice_ddim(filter_dims, 2, in_dims.size());
std::vector<int> ksize = phi::vectorize<int>(filter_data_dims);
phi::UpdatePaddingAndDilation(
&padding, &dilation, padding_algorithm, in_data_dims, stride, ksize);
// construct NPU attr
std::vector<int> strides(5, 1);
std::vector<int> dilations(5, 1);
Tensor input_tensor, output_tensor, filter_tensor;
input_tensor.Resize(input->dims());
input_tensor.ShareDataWith(*input);
output_tensor.Resize(output->dims());
output_tensor.ShareDataWith(*output);
filter_tensor.Resize(filter->dims());
filter_tensor.ShareDataWith(*filter);
PADDLE_ENFORCE_EQ(
dilation[0],
1,
platform::errors::InvalidArgument(
"dilation[0] must be equal 1, but received %d.", dilation[0]));
if (channel_last) {
input_tensor.set_layout(DataLayout::kNDHWC);
output_tensor.set_layout(DataLayout::kNDHWC);
strides[1] = stride[0];
strides[2] = stride[1];
strides[3] = stride[2];
dilations[2] = dilation[1];
dilations[3] = dilation[2];
} else {
input_tensor.set_layout(DataLayout::kNCDHW);
output_tensor.set_layout(DataLayout::kNCDHW);
strides[2] = stride[0];
strides[3] = stride[1];
strides[4] = stride[2];
dilations[3] = dilation[1];
dilations[4] = dilation[2];
}
filter_tensor.set_layout(DataLayout::kNCDHW);
auto output_dim_vec = phi::vectorize<int32_t>(output_tensor.dims());
auto& dev_ctx = ctx.template device_context<NPUDeviceContext>();
NpuOpRunner runner;
runner.SetType("Conv3DBackpropInputD")
.AddInput(filter_tensor)
.AddInput(input_tensor)
.AddAttr("input_size", output_dim_vec)
.AddAttr("strides", strides)
.AddAttr("pads", padding)
.AddAttr("dilations", dilations)
.AddAttr("groups", groups)
.AddAttr("data_format", data_format)
.AddOutput(output_tensor);
runner.Run(dev_ctx.stream());
}
};
} // namespace operators
} // namespace paddle
......@@ -212,3 +312,7 @@ REGISTER_OP_NPU_KERNEL(conv2d_transpose,
REGISTER_OP_NPU_KERNEL(conv2d_transpose_grad,
ops::Conv2DTransposeGradNPUKernel<float>,
ops::Conv2DTransposeGradNPUKernel<plat::float16>);
REGISTER_OP_NPU_KERNEL(conv3d_transpose,
ops::Conv3DTransposeNPUKernel<float>,
ops::Conv3DTransposeNPUKernel<plat::float16>);
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
#include "paddle/phi/kernels/funcs/math_function.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
template <typename DeviceContext, typename T>
class InstanceNormNPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
const auto epsilon = ctx.Attr<float>("epsilon");
const auto* x = ctx.Input<Tensor>("X");
const auto* scale = ctx.Input<Tensor>("Scale");
const auto* bias = ctx.Input<Tensor>("Bias");
auto* y = ctx.Output<Tensor>("Y");
auto* mean = ctx.Output<Tensor>("SavedMean");
auto* variance = ctx.Output<Tensor>("SavedVariance");
auto& dev_ctx = ctx.template device_context<DeviceContext>();
dev_ctx.template Alloc<T>(y);
dev_ctx.template Alloc<T>(mean);
dev_ctx.template Alloc<T>(variance);
auto x_dims = x->dims();
auto y_dims = y->dims();
PADDLE_ENFORCE(x_dims.size() <= 5 && x_dims.size() >= 3,
platform::errors::InvalidArgument(
"InstanceNorm only supports the dimension of input "
" less equal to 5 and greater equal to 3. the dimension "
"of input is %d.",
x_dims.size()));
auto tmp_x_dims = phi::vectorize<int>(x_dims);
auto tmp_y_dims = phi::vectorize<int>(y_dims);
if (x_dims.size() < 5) {
for (size_t i = x_dims.size(); i < 5; ++i) {
tmp_x_dims.insert(tmp_x_dims.begin() + 2, 1);
tmp_y_dims.insert(tmp_y_dims.begin() + 2, 1);
}
}
Tensor tmp_x, tmp_y;
tmp_x.ShareDataWith(*x);
tmp_x.Resize(phi::make_ddim(tmp_x_dims));
tmp_x.set_layout(paddle::framework::DataLayout::NCDHW);
tmp_y.ShareDataWith(*y);
tmp_y.Resize(phi::make_ddim(tmp_y_dims));
tmp_y.set_layout(paddle::framework::DataLayout::NCDHW);
NpuOpRunner runner;
runner.SetType("InstanceNorm")
.AddInput(tmp_x)
.AddInput(*scale)
.AddInput(*bias)
.AddAttr("data_format", std::string("NCDHW"))
.AddAttr("epsilon", epsilon)
.AddOutput(tmp_y)
.AddOutput(*mean)
.AddOutput(*variance);
runner.Run(dev_ctx.stream());
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_NPU_KERNEL(
instance_norm,
ops::InstanceNormNPUKernel<paddle::platform::NPUDeviceContext,
plat::float16>,
ops::InstanceNormNPUKernel<paddle::platform::NPUDeviceContext, float>);
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import numpy as np
import unittest
import sys
sys.path.append("..")
from op_test import OpTest
import paddle
import paddle.fluid.core as core
import paddle.fluid as fluid
paddle.enable_static()
def conv3dtranspose_forward_naive(input_, filter_, attrs):
padding_algorithm = attrs['padding_algorithm']
if padding_algorithm not in ["SAME", "VALID", "EXPLICIT"]:
raise ValueError("Unknown Attr(padding_algorithm): '%s'. "
"It can only be 'SAME' or 'VALID'." %
str(padding_algorithm))
if attrs['data_format'] == 'NHWC':
input_ = np.transpose(input_, [0, 4, 1, 2, 3])
in_n, in_c, in_d, in_h, in_w = input_.shape
f_c, f_out_c, f_d, f_h, f_w = filter_.shape
groups = attrs['groups']
assert in_c == f_c
out_c = f_out_c * groups
sub_in_c = in_c // groups
stride, pad, dilations = attrs['strides'], attrs['paddings'], attrs[
'dilations']
def _get_padding_with_SAME(input_shape, kernel_size, kernel_stride):
padding = []
for input_size, filter_size, stride_size in zip(input_shape,
kernel_size,
kernel_stride):
out_size = int((input_size + stride_size - 1) / stride_size)
pad_sum = np.max(
((out_size - 1) * stride_size + filter_size - input_size, 0))
pad_0 = int(pad_sum / 2)
pad_1 = int(pad_sum - pad_0)
padding.append(pad_0)
padding.append(pad_1)
return padding
ksize = filter_.shape[2:5]
if padding_algorithm == "VALID":
pad = [0, 0, 0, 0, 0, 0]
elif padding_algorithm == "SAME":
dilations = [1, 1, 1]
input_data_shape = input_.shape[2:5]
pad = _get_padding_with_SAME(input_data_shape, ksize, stride)
pad_d_0, pad_d_1 = pad[0], pad[0]
pad_h_0, pad_h_1 = pad[1], pad[1]
pad_w_0, pad_w_1 = pad[2], pad[2]
if len(pad) == 6:
pad_d_0, pad_d_1 = pad[0], pad[1]
pad_h_0, pad_h_1 = pad[2], pad[3]
pad_w_0, pad_w_1 = pad[4], pad[5]
d_bolck_d = dilations[0] * (f_d - 1) + 1
d_bolck_h = dilations[1] * (f_h - 1) + 1
d_bolck_w = dilations[2] * (f_w - 1) + 1
out_d = (in_d - 1) * stride[0] + d_bolck_d
out_h = (in_h - 1) * stride[1] + d_bolck_h
out_w = (in_w - 1) * stride[2] + d_bolck_w
out = np.zeros((in_n, out_c, out_d, out_h, out_w))
for n in range(in_n):
for d in range(in_d):
for i in range(in_h):
for j in range(in_w):
for g in range(groups):
input_masked = input_[n,
g * sub_in_c:(g + 1) * sub_in_c,
d, i, j] # (c)
input_masked = np.reshape(input_masked,
(sub_in_c, 1, 1, 1))
input_masked = np.tile(input_masked, (1, f_d, f_h, f_w))
for k in range(f_out_c):
tmp_out = np.sum(input_masked *
filter_[g * sub_in_c:(g + 1) *
sub_in_c, k, :, :, :],
axis=0)
d1, d2 = d * stride[0], d * stride[0] + d_bolck_d
i1, i2 = i * stride[1], i * stride[1] + d_bolck_h
j1, j2 = j * stride[2], j * stride[2] + d_bolck_w
out[n, g * f_out_c + k, d1:d2:dilations[0],
i1:i2:dilations[1],
j1:j2:dilations[2]] += tmp_out
out = out[:, :, pad_d_0:out_d - pad_d_1, pad_h_0:out_h - pad_h_1,
pad_w_0:out_w - pad_w_1]
if attrs['data_format'] == 'NHWC':
out = np.transpose(out, [0, 2, 3, 4, 1])
return out
class TestConv3DTransposeOp(OpTest):
def set_npu(self):
self.__class__.use_npu = True
self.place = paddle.NPUPlace(0)
def setUp(self):
# init as conv transpose
self.check_no_input = False
self.check_no_filter = False
self.data_format = 'NCHW'
self.pad = [0, 0, 0]
self.padding_algorithm = "EXPLICIT"
self.init_op_type()
self.init_test_case()
self.set_npu()
input_ = np.random.random(self.input_size).astype("float32")
filter_ = np.random.random(self.filter_size).astype("float32")
self.inputs = {'Input': input_, 'Filter': filter_}
self.attrs = {
'strides': self.stride,
'paddings': self.pad,
'padding_algorithm': self.padding_algorithm,
'dilations': self.dilations,
'groups': self.groups,
'data_format': self.data_format
}
output = conv3dtranspose_forward_naive(input_, filter_,
self.attrs).astype("float32")
self.outputs = {'Output': output}
def test_check_output(self):
self.check_output_with_place(self.place, atol=8e-3)
def init_test_case(self):
self.pad = [0, 0, 0]
self.stride = [1, 1, 1]
self.dilations = [1, 1, 1]
self.groups = 1
self.input_size = [1, 1, 5, 5, 5] # NCDHW
f_c = self.input_size[1]
self.filter_size = [f_c, 1, 3, 3, 3]
def init_op_type(self):
self.op_type = "conv3d_transpose"
class TestWithSymmetricPad(TestConv3DTransposeOp):
def init_test_case(self):
self.check_no_input = True
self.pad = [1, 1, 1]
self.stride = [1, 1, 1]
self.dilations = [1, 1, 1]
self.groups = 1
self.input_size = [1, 2, 5, 5, 5] # NCDHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3, 3]
class TestWithAsymmetricPad(TestConv3DTransposeOp):
def init_test_case(self):
self.pad = [1, 0, 1, 0, 1, 2]
self.stride = [1, 1, 1]
self.dilations = [1, 1, 1]
self.groups = 1
self.input_size = [1, 2, 5, 5, 5] # NCDHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3, 3]
class TestWithSAMEPad(TestConv3DTransposeOp):
def init_test_case(self):
self.stride = [1, 1, 2]
self.dilations = [1, 2, 1]
self.groups = 1
self.input_size = [1, 2, 5, 5, 6] # NCDHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3, 4]
self.padding_algorithm = 'SAME'
class TestWithVALIDPad(TestConv3DTransposeOp):
def init_test_case(self):
self.stride = [2, 1, 1]
self.dilations = [1, 1, 1]
self.groups = 1
self.input_size = [1, 2, 5, 5, 5] # NCDHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 4, 3]
self.padding_algorithm = 'VALID'
class TestWithStride(TestConv3DTransposeOp):
def init_test_case(self):
self.check_no_filter = True
self.pad = [1, 1, 1]
self.stride = [2, 2, 2]
self.dilations = [1, 1, 1]
self.groups = 1
self.input_size = [1, 2, 5, 5, 5] # NCDHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3, 3]
class TestWithDilation(TestConv3DTransposeOp):
def init_test_case(self):
self.pad = [1, 1, 1]
self.stride = [1, 1, 1]
self.dilations = [1, 2, 2]
self.groups = 1
self.input_size = [1, 2, 5, 5, 5] # NCDHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3, 3]
class Test_NHWC(TestConv3DTransposeOp):
def init_test_case(self):
self.pad = [0, 0, 0]
self.stride = [1, 1, 1]
self.dilations = [1, 1, 1]
self.groups = 1
self.input_size = [1, 5, 5, 5, 2] # NDHWC
f_c = self.input_size[-1]
self.filter_size = [f_c, 6, 3, 3, 3]
self.data_format = 'NHWC'
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import numpy as np
import unittest
import sys
sys.path.append("..")
from op_test import OpTest
import paddle
from paddle import fluid
from paddle.static import Program, program_guard
from paddle.fluid import core
from paddle.fluid.op import Operator
from paddle.fluid.dygraph import to_variable
from paddle.fluid.framework import _test_eager_guard
paddle.enable_static()
class TestInstanceNorm(unittest.TestCase):
def test_dygraph(self):
places = [fluid.NPUPlace(0)]
for p in places:
shape = [4, 10, 4, 4]
def compute_v1(x):
with fluid.dygraph.guard(p):
bn = fluid.dygraph.InstanceNorm(shape[1])
y = bn(fluid.dygraph.to_variable(x))
return y.numpy()
def compute_v2(x):
with fluid.dygraph.guard(p):
bn = paddle.nn.InstanceNorm2D(shape[1])
y = bn(fluid.dygraph.to_variable(x))
return y.numpy()
x = np.random.randn(*shape).astype("float32")
y1 = compute_v1(x)
y2 = compute_v2(x)
np.testing.assert_allclose(y1, y2, rtol=1e-03)
def test_static(self):
places = [fluid.NPUPlace(0)]
for p in places:
exe = fluid.Executor(p)
shape = [4, 10, 16, 16]
def compute_v1(x_np):
with program_guard(Program(), Program()):
ins = fluid.dygraph.InstanceNorm(shape[1])
x = fluid.data(name='x', shape=x_np.shape, dtype=x_np.dtype)
y = ins(x)
exe.run(fluid.default_startup_program())
r = exe.run(feed={'x': x_np}, fetch_list=[y])[0]
return r
def compute_v2(x_np):
with program_guard(Program(), Program()):
ins = paddle.nn.InstanceNorm2D(shape[1])
x = fluid.data(name='x', shape=x_np.shape, dtype=x_np.dtype)
y = ins(x)
exe.run(fluid.default_startup_program())
r = exe.run(feed={'x': x_np}, fetch_list=[y])[0]
return r
x = np.random.randn(*shape).astype("float32")
y1 = compute_v1(x)
y2 = compute_v2(x)
np.testing.assert_allclose(y1, y2, rtol=1e-03)
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册