未验证 提交 0e2f5bb1 编写于 作者: Q QingshuChen 提交者: GitHub

add nearest_interp_v2 on kunlun (#29725) (#29822)

* add nearest_interp_v2 on kunlun

* add nearest_interp_v2 on kunlun
Co-authored-by: NTTerror <tangzhiyi11@users.noreply.github.com>
上级 f7a598fa
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/interpolate_op.h"
#ifdef PADDLE_WITH_XPU
namespace paddle {
namespace operators {
using framework::Tensor;
using DataLayout = framework::DataLayout;
inline std::vector<int> get_new_shape_xpu(
const std::vector<const Tensor*>& list_new_shape_tensor) {
// get tensor from
std::vector<int> vec_new_shape;
for (size_t i = 0; i < list_new_shape_tensor.size(); ++i) {
auto tensor = list_new_shape_tensor[i];
PADDLE_ENFORCE_EQ(
tensor->dims(), framework::make_ddim({1}),
platform::errors::InvalidArgument("shape of dim tensor should be [1]"));
framework::Tensor temp;
TensorCopySync(*tensor, platform::CPUPlace(), &temp);
vec_new_shape.push_back(static_cast<int32_t>(*temp.data<int32_t>()));
}
return vec_new_shape;
}
template <typename T>
inline std::vector<T> get_new_data_from_tensor_xpu(
const Tensor* new_data_tensor) {
std::vector<T> vec_new_data;
framework::Tensor cpu_starts_tensor;
TensorCopySync(*new_data_tensor, platform::CPUPlace(), &cpu_starts_tensor);
auto* new_data = cpu_starts_tensor.data<T>();
vec_new_data = std::vector<T>(new_data, new_data + new_data_tensor->numel());
return vec_new_data;
}
template <typename T>
class InterpolateV2XPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* input = ctx.Input<Tensor>("X");
auto* output = ctx.Output<Tensor>("Out");
auto input_dims = input->dims();
PADDLE_ENFORCE_EQ(
input_dims.size(), 4,
platform::errors::External("XPU Interpolate kernel only support 2d"));
const std::string data_layout_str = ctx.Attr<std::string>("data_layout");
const DataLayout data_layout =
framework::StringToDataLayout(data_layout_str);
int n, c, in_d, in_h, in_w;
ExtractNCDWH(input_dims, data_layout, &n, &c, &in_d, &in_h, &in_w);
auto interp_method = ctx.Attr<std::string>("interp_method");
bool align_corners = ctx.Attr<bool>("align_corners");
int align_mode = ctx.Attr<int>("align_mode");
int out_h = ctx.Attr<int>("out_h");
int out_w = ctx.Attr<int>("out_w");
float scale_h = -1;
float scale_w = -1;
auto list_new_size_tensor = ctx.MultiInput<framework::Tensor>("SizeTensor");
if (list_new_size_tensor.size() > 0) {
// have size tensor
auto new_size = get_new_shape_xpu(list_new_size_tensor);
out_h = new_size[0];
out_w = new_size[1];
} else {
auto scale_tensor = ctx.Input<Tensor>("Scale");
auto scale = ctx.Attr<std::vector<float>>("scale");
if (scale_tensor != nullptr) {
auto scale_data = get_new_data_from_tensor_xpu<float>(scale_tensor);
if (scale_data.size() > 1) {
scale_h = scale_data[0];
scale_w = scale_data[1];
} else {
scale_h = scale_data[0];
scale_w = scale_data[0];
}
PADDLE_ENFORCE_EQ(
scale_w > 0 && scale_h > 0, true,
platform::errors::InvalidArgument("scale of Op(interpolate) "
"should be greater than 0."));
} else {
if (scale.size() > 1) {
scale_h = scale[0];
scale_w = scale[1];
PADDLE_ENFORCE_EQ(
scale_w > 0 && scale_h > 0, true,
platform::errors::InvalidArgument("scale of Op(interpolate) "
"should be greater than 0."));
}
}
if (scale_h > 0. && scale_w > 0.) {
out_h = static_cast<int>(in_h * scale_h);
out_w = static_cast<int>(in_w * scale_w);
}
auto out_size = ctx.Input<Tensor>("OutSize");
if (out_size != nullptr) {
auto out_size_data = get_new_data_from_tensor<int>(out_size);
out_h = out_size_data[0];
out_w = out_size_data[1];
}
}
PADDLE_ENFORCE_GT(out_h, 0, platform::errors::InvalidArgument(
"out_h in Attr(out_shape) of "
"Op(interpolate) "
"should be greater than 0."));
PADDLE_ENFORCE_GT(out_w, 0, platform::errors::InvalidArgument(
"out_w in Attr(out_shape) of "
"Op(interpolate) "
"should be greater than 0."));
framework::DDim dim_out;
if (data_layout == DataLayout::kNCHW) {
dim_out = {n, c, out_h, out_w};
} else {
dim_out = {n, out_h, out_w, c};
}
output->mutable_data<T>(dim_out, ctx.GetPlace());
if (in_h == out_h && in_w == out_w) {
framework::TensorCopy(*input, ctx.GetPlace(), output);
return;
}
bool nearest = "nearest" == interp_method;
int trans_mode = (align_corners) ? (0) : ((align_mode == 0) ? (1) : (2));
auto& dev_ctx = ctx.template device_context<platform::XPUDeviceContext>();
if (nearest) {
PADDLE_ENFORCE_EQ((data_layout == DataLayout::kNCHW), true,
platform::errors::InvalidArgument(
"XPU nearest is only support NCHW"));
}
int r = xpu::interpolate2d<T>(dev_ctx.x_context(), input->data<T>(),
output->data<T>(), n, c, in_h, in_w, out_h,
out_w, nearest, trans_mode,
(data_layout == DataLayout::kNCHW));
PADDLE_ENFORCE_EQ(r, XPU_SUCCESS,
platform::errors::External("XPU interpolate2d kernel "
"return wrong value[%d %s]",
r, XPUAPIErrorMsg[r]));
}
};
template <typename T>
class InterpolateV2GradXPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* input_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
auto* output_grad = ctx.Input<Tensor>(framework::GradVarName("Out"));
auto output_grad_dims = output_grad->dims();
PADDLE_ENFORCE_EQ(output_grad_dims.size(), 4,
platform::errors::External(
"XPU Interpolategrad kernel only support 2d"));
auto* input = ctx.Input<Tensor>("X");
const std::string data_layout_str = ctx.Attr<std::string>("data_layout");
const DataLayout data_layout =
framework::StringToDataLayout(data_layout_str);
int n, c, in_d, in_h, in_w;
ExtractNCDWH(input->dims(), data_layout, &n, &c, &in_d, &in_h, &in_w);
auto interp_method = ctx.Attr<std::string>("interp_method");
bool align_corners = ctx.Attr<bool>("align_corners");
int align_mode = ctx.Attr<int>("align_mode");
int out_h = ctx.Attr<int>("out_h");
int out_w = ctx.Attr<int>("out_w");
float scale_h = -1;
float scale_w = -1;
auto list_new_size_tensor = ctx.MultiInput<framework::Tensor>("SizeTensor");
if (list_new_size_tensor.size() > 0) {
// have size tensor
auto new_size = get_new_shape_xpu(list_new_size_tensor);
out_h = new_size[0];
out_w = new_size[1];
} else {
auto scale_tensor = ctx.Input<Tensor>("Scale");
auto scale = ctx.Attr<std::vector<float>>("scale");
if (scale_tensor != nullptr) {
auto scale_data = get_new_data_from_tensor_xpu<float>(scale_tensor);
if (scale_data.size() > 1) {
scale_h = scale_data[0];
scale_w = scale_data[1];
} else {
scale_h = scale_data[0];
scale_w = scale_data[0];
}
PADDLE_ENFORCE_EQ(
scale_w > 0 && scale_h > 0, true,
platform::errors::InvalidArgument("scale of Op(interpolate) "
"should be greater than 0."));
} else {
if (scale.size() > 1) {
scale_h = scale[0];
scale_w = scale[1];
PADDLE_ENFORCE_EQ(
scale_w > 0 && scale_h > 0, true,
platform::errors::InvalidArgument("scale of Op(interpolate) "
"should be greater than 0."));
}
}
if (scale_h > 0. && scale_w > 0.) {
out_h = static_cast<int>(in_h * scale_h);
out_w = static_cast<int>(in_w * scale_w);
}
auto out_size = ctx.Input<Tensor>("OutSize");
if (out_size != nullptr) {
auto out_size_data = get_new_data_from_tensor<int>(out_size);
out_h = out_size_data[0];
out_w = out_size_data[1];
}
}
framework::DDim dim_grad;
if (data_layout == DataLayout::kNCHW) {
dim_grad = {n, c, in_h, in_w};
} else {
dim_grad = {n, in_h, in_w, c};
}
input_grad->mutable_data<T>(dim_grad, ctx.GetPlace());
auto& dev_ctx = ctx.template device_context<platform::XPUDeviceContext>();
int r = XPU_SUCCESS;
r = xpu::constant<T>(dev_ctx.x_context(), input_grad->data<T>(),
input_grad->numel(), static_cast<T>(0.0));
PADDLE_ENFORCE_EQ(r, XPU_SUCCESS,
platform::errors::External(
"XPU constant in interpolate2d_grad kernel return "
"wrong value[%d %s]",
r, XPUAPIErrorMsg[r]));
if (in_h == out_h && in_w == out_w) {
framework::TensorCopy(*output_grad, ctx.GetPlace(), input_grad);
return;
}
bool nearest = "nearest" == interp_method;
int trans_mode = (align_corners) ? (0) : ((align_mode == 0) ? (1) : (2));
if (nearest) {
trans_mode = (align_corners) ? (0) : (2);
}
r = xpu::interpolate2d_grad<T>(dev_ctx.x_context(), output_grad->data<T>(),
input_grad->data<T>(), n, c, in_h, in_w,
out_h, out_w, nearest, trans_mode,
(data_layout == DataLayout::kNCHW));
PADDLE_ENFORCE_EQ(
r, XPU_SUCCESS,
platform::errors::External("XPU interpolate2d_grad kernel return "
"wrong value[%d %s]",
r, XPUAPIErrorMsg[r]));
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_XPU_KERNEL(bilinear_interp_v2, ops::InterpolateV2XPUKernel<float>);
REGISTER_OP_XPU_KERNEL(nearest_interp_v2, ops::InterpolateV2XPUKernel<float>);
REGISTER_OP_XPU_KERNEL(bilinear_interp_v2_grad,
ops::InterpolateV2GradXPUKernel<float>);
REGISTER_OP_XPU_KERNEL(nearest_interp_v2_grad,
ops::InterpolateV2GradXPUKernel<float>);
#endif
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
import paddle
import paddle.fluid.core as core
import sys
sys.path.append("..")
from op_test_xpu import XPUOpTest
import paddle.fluid as fluid
from paddle.fluid import Program, program_guard
paddle.enable_static()
def nearest_neighbor_interp_np(X,
out_h,
out_w,
scale_h=0,
scale_w=0,
out_size=None,
actual_shape=None,
align_corners=True,
data_layout='NCHW'):
"""nearest neighbor interpolation implement in shape [N, C, H, W]"""
if data_layout == "NHWC":
X = np.transpose(X, (0, 3, 1, 2)) # NHWC => NCHW
if out_size is not None:
out_h = out_size[0]
out_w = out_size[1]
if actual_shape is not None:
out_h = actual_shape[0]
out_w = actual_shape[1]
n, c, in_h, in_w = X.shape
ratio_h = ratio_w = 0.0
if (out_h > 1):
if (align_corners):
ratio_h = (in_h - 1.0) / (out_h - 1.0)
else:
if scale_h > 0:
ratio_h = 1.0 / scale_h
else:
ratio_h = 1.0 * in_h / out_h
if (out_w > 1):
if (align_corners):
ratio_w = (in_w - 1.0) / (out_w - 1.0)
else:
if scale_w > 0:
ratio_w = 1.0 / scale_w
else:
ratio_w = 1.0 * in_w / out_w
out = np.zeros((n, c, out_h, out_w))
if align_corners:
for i in range(out_h):
in_i = int(ratio_h * i + 0.5)
for j in range(out_w):
in_j = int(ratio_w * j + 0.5)
out[:, :, i, j] = X[:, :, in_i, in_j]
else:
for i in range(out_h):
in_i = int(ratio_h * i)
for j in range(out_w):
in_j = int(ratio_w * j)
out[:, :, i, j] = X[:, :, in_i, in_j]
if data_layout == "NHWC":
out = np.transpose(out, (0, 2, 3, 1)) # NCHW => NHWC
return out.astype(X.dtype)
class TestNearestInterpOp(XPUOpTest):
def setUp(self):
self.use_xpu = True
self.out_size = None
self.actual_shape = None
self.init_test_case()
self.op_type = "nearest_interp_v2"
self.shape_by_1Dtensor = False
self.scale_by_1Dtensor = False
self.attrs = {
'interp_method': self.interp_method,
'align_corners': self.align_corners,
}
input_np = np.random.random(self.input_shape).astype("float32")
self.inputs = {'X': input_np}
if self.scale_by_1Dtensor:
self.inputs['Scale'] = np.array([self.scale]).astype("float32")
elif self.scale:
if isinstance(self.scale, float) or isinstance(self.scale, int):
if self.scale > 0:
scale_h = scale_w = float(self.scale)
if isinstance(self.scale, list) and len(self.scale) == 1:
scale_w = scale_h = self.scale[0]
elif isinstance(self.scale, list) and len(self.scale) > 1:
scale_w = self.scale[1]
scale_h = self.scale[0]
out_h = int(self.input_shape[2] * scale_h)
out_w = int(self.input_shape[3] * scale_w)
else:
out_h = self.out_h
out_w = self.out_w
if self.shape_by_1Dtensor:
self.inputs['OutSize'] = self.out_size
elif self.out_size is not None:
size_tensor = []
for index, ele in enumerate(self.out_size):
size_tensor.append(("x" + str(index), np.ones(
(1)).astype('int32') * ele))
self.inputs['SizeTensor'] = size_tensor
self.attrs['out_h'] = self.out_h
self.attrs['out_w'] = self.out_w
if self.scale:
if isinstance(self.scale, float) or isinstance(self.scale, int):
if self.scale > 0:
self.scale = [self.scale]
if isinstance(self.scale, list) and len(self.scale) == 1:
self.scale = [self.scale[0], self.scale[0]]
self.attrs['scale'] = self.scale
output_np = nearest_neighbor_interp_np(input_np, out_h, out_w, 0, 0,
self.out_size, self.actual_shape,
self.align_corners)
self.outputs = {'Out': output_np}
def test_check_output(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_output_with_place(place)
def test_check_grad(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_grad_with_place(place, ['X'], 'Out', in_place=True)
def init_test_case(self):
self.interp_method = 'nearest'
self.input_shape = [2, 5, 4, 4]
self.out_h = 3
self.out_w = 3
self.scale = 0.
self.out_size = [3, 3]
self.align_corners = True
class TestNearestNeighborInterpCase1(TestNearestInterpOp):
def init_test_case(self):
self.interp_method = 'nearest'
self.input_shape = [4, 1, 7, 8]
self.out_h = 1
self.out_w = 1
self.scale = 0.
self.align_corners = True
class TestNearestNeighborInterpCase2(TestNearestInterpOp):
def init_test_case(self):
self.interp_method = 'nearest'
self.input_shape = [3, 3, 9, 6]
self.out_h = 12
self.out_w = 12
self.scale = 0.
self.align_corners = True
class TestNearestNeighborInterpCase3(TestNearestInterpOp):
def init_test_case(self):
self.interp_method = 'nearest'
self.input_shape = [1, 1, 32, 64]
self.out_h = 64
self.out_w = 32
self.scale = 0.
self.align_corners = True
class TestNearestNeighborInterpCase4(TestNearestInterpOp):
def init_test_case(self):
self.interp_method = 'nearest'
self.input_shape = [4, 1, 7, 8]
self.out_h = 1
self.out_w = 1
self.scale = 0.
self.out_size = np.array([2, 2]).astype("int32")
self.align_corners = True
class TestNearestNeighborInterpCase5(TestNearestInterpOp):
def init_test_case(self):
self.interp_method = 'nearest'
self.input_shape = [3, 3, 9, 6]
self.out_h = 12
self.out_w = 12
self.scale = 0.
self.out_size = np.array([11, 11]).astype("int32")
self.align_corners = True
class TestNearestNeighborInterpCase6(TestNearestInterpOp):
def init_test_case(self):
self.interp_method = 'nearest'
self.input_shape = [1, 1, 32, 64]
self.out_h = 64
self.out_w = 32
self.scale = 0.
self.out_size = np.array([65, 129]).astype("int32")
self.align_corners = True
class TestNearestNeighborInterpSame(TestNearestInterpOp):
def init_test_case(self):
self.interp_method = 'nearest'
self.input_shape = [2, 3, 32, 64]
self.out_h = 32
self.out_w = 64
self.scale = 0.
self.align_corners = True
class TestNearestNeighborInterpActualShape(TestNearestInterpOp):
def init_test_case(self):
self.interp_method = 'nearest'
self.input_shape = [3, 2, 32, 16]
self.out_h = 64
self.out_w = 32
self.scale = 0.
self.out_size = np.array([66, 40]).astype("int32")
self.align_corners = True
class TestNearestNeighborInterpDataLayout(TestNearestInterpOp):
def init_test_case(self):
self.interp_method = 'nearest'
self.input_shape = [2, 4, 4, 5]
self.out_h = 2
self.out_w = 2
self.scale = 0.
self.out_size = np.array([3, 8]).astype("int32")
self.align_corners = True
self.data_layout = "NHWC"
class TestNearestInterpWithoutCorners(TestNearestInterpOp):
def set_align_corners(self):
self.align_corners = False
class TestNearestNeighborInterpScale1(TestNearestInterpOp):
def init_test_case(self):
self.interp_method = 'nearest'
self.input_shape = [3, 2, 7, 5]
self.out_h = 64
self.out_w = 32
self.scale = 2.
self.out_size = np.array([66, 40]).astype("int32")
self.align_corners = True
class TestNearestNeighborInterpScale2(TestNearestInterpOp):
def init_test_case(self):
self.interp_method = 'nearest'
self.input_shape = [3, 2, 5, 7]
self.out_h = 64
self.out_w = 32
self.scale = 1.5
self.out_size = np.array([66, 40]).astype("int32")
self.align_corners = True
class TestNearestNeighborInterpScale3(TestNearestInterpOp):
def init_test_case(self):
self.interp_method = 'nearest'
self.input_shape = [3, 2, 7, 5]
self.out_h = 64
self.out_w = 32
self.scale = [2.0, 3.0]
self.out_size = np.array([66, 40]).astype("int32")
self.align_corners = True
class TestNearestInterpOp_attr_tensor(XPUOpTest):
def setUp(self):
self.use_xpu = True
self.out_size = None
self.actual_shape = None
self.init_test_case()
self.op_type = "nearest_interp_v2"
self.shape_by_1Dtensor = False
self.scale_by_1Dtensor = False
self.attrs = {
'interp_method': self.interp_method,
'align_corners': self.align_corners,
}
input_np = np.random.random(self.input_shape).astype("float32")
self.inputs = {'X': input_np}
if self.scale_by_1Dtensor:
self.inputs['Scale'] = np.array([self.scale]).astype("float32")
elif self.scale:
if isinstance(self.scale, float) or isinstance(self.scale, int):
if self.scale > 0:
scale_h = scale_w = float(self.scale)
if isinstance(self.scale, list) and len(self.scale) == 1:
scale_w = scale_h = self.scale[0]
elif isinstance(self.scale, list) and len(self.scale) > 1:
scale_w = self.scale[1]
scale_h = self.scale[0]
out_h = int(self.input_shape[2] * scale_h)
out_w = int(self.input_shape[3] * scale_w)
else:
out_h = self.out_h
out_w = self.out_w
if self.shape_by_1Dtensor:
self.inputs['OutSize'] = self.out_size
elif self.out_size is not None:
size_tensor = []
for index, ele in enumerate(self.out_size):
size_tensor.append(("x" + str(index), np.ones(
(1)).astype('int32') * ele))
self.inputs['SizeTensor'] = size_tensor
self.attrs['out_h'] = self.out_h
self.attrs['out_w'] = self.out_w
if self.scale:
if isinstance(self.scale, float) or isinstance(self.scale, int):
if self.scale > 0:
self.scale = [self.scale]
if isinstance(self.scale, list) and len(self.scale) == 1:
self.scale = [self.scale[0], self.scale[0]]
self.attrs['scale'] = self.scale
output_np = nearest_neighbor_interp_np(input_np, out_h, out_w, 0, 0,
self.out_size, self.actual_shape,
self.align_corners)
self.outputs = {'Out': output_np}
def test_check_output(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_output_with_place(place)
def test_check_grad(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_grad_with_place(place, ['X'], 'Out', in_place=True)
def init_test_case(self):
self.interp_method = 'nearest'
self.input_shape = [2, 5, 4, 4]
self.out_h = 3
self.out_w = 3
self.scale = 0.
self.out_size = [3, 3]
self.align_corners = True
# out_size is a tensor list
class TestNearestInterp_attr_tensor_Case1(TestNearestInterpOp_attr_tensor):
def init_test_case(self):
self.interp_method = 'nearest'
self.input_shape = [3, 3, 9, 6]
self.out_h = 12
self.out_w = 12
self.scale = 0.
self.out_size = [8, 12]
self.align_corners = True
# out_size is a 1-D tensor
class TestNearestInterp_attr_tensor_Case2(TestNearestInterpOp_attr_tensor):
def init_test_case(self):
self.interp_method = 'nearest'
self.input_shape = [3, 2, 32, 16]
self.out_h = 64
self.out_w = 32
self.scale = 0.
self.out_size = np.array([66, 40]).astype("int32")
self.align_corners = True
self.shape_by_1Dtensor = True
# scale is a 1-D tensor
class TestNearestInterp_attr_tensor_Case3(TestNearestInterpOp_attr_tensor):
def init_test_case(self):
self.interp_method = 'nearest'
self.input_shape = [3, 2, 32, 16]
self.out_h = 64
self.out_w = 32
self.scale = 2.0
self.out_size = None
self.align_corners = True
self.scale_by_1Dtensor = True
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册