未验证 提交 5d19f8d8 编写于 作者: J jakpiase 提交者: GitHub

Added bilinear and nearest interp v2 oneDNN FP32 kernels (#32312)

上级 4898c38d
...@@ -43,8 +43,9 @@ void InterpolateMKLDNNPass::ApplyImpl(ir::Graph* graph) const { ...@@ -43,8 +43,9 @@ void InterpolateMKLDNNPass::ApplyImpl(ir::Graph* graph) const {
int found_count = 0; int found_count = 0;
const std::vector<std::string> interpolate_op_types = { const std::vector<std::string> interpolate_op_types = {
"bilinear_interp", "nearest_interp", "trilinear_interp", "bicubic_interp", "bilinear_interp", "nearest_interp", "trilinear_interp",
"linear_interp"}; "bicubic_interp", "linear_interp", "bilinear_interp_v2",
"nearest_interp_v2"};
for (const Node* node : graph->Nodes()) { for (const Node* node : graph->Nodes()) {
if (node->IsOp() && if (node->IsOp() &&
......
...@@ -77,7 +77,8 @@ bool PlacementPassBase::IsDefaultOpTypes(const std::string& op_type) const { ...@@ -77,7 +77,8 @@ bool PlacementPassBase::IsDefaultOpTypes(const std::string& op_type) const {
// the corresponding pass. // the corresponding pass.
const std::vector<std::string> not_default_op_types = { const std::vector<std::string> not_default_op_types = {
"bilinear_interp", "nearest_interp", "trilinear_interp", "bilinear_interp", "nearest_interp", "trilinear_interp",
"bicubic_interp", "linear_interp"}; "bicubic_interp", "linear_interp", "bilinear_interp_v2",
"linear_interp_v2"};
bool is_interpolate_op = bool is_interpolate_op =
std::find(not_default_op_types.begin(), not_default_op_types.end(), std::find(not_default_op_types.begin(), not_default_op_types.end(),
op_type) != not_default_op_types.end(); op_type) != not_default_op_types.end();
......
...@@ -14,6 +14,9 @@ ...@@ -14,6 +14,9 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -359,13 +362,41 @@ class InterpolateV2Op : public framework::OperatorWithKernel { ...@@ -359,13 +362,41 @@ class InterpolateV2Op : public framework::OperatorWithKernel {
protected: protected:
framework::OpKernelType GetExpectedKernelType( framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override { const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType( framework::DataLayout layout = framework::DataLayout::kAnyLayout;
OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace()); framework::LibraryType library = framework::LibraryType::kPlain;
auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X");
#ifdef PADDLE_WITH_MKLDNN
auto interp_method = ctx.Attr<std::string>("interp_method");
// TODO(danqing): support other interp_method
if (this->CanMKLDNNBeUsed(ctx, data_type) &&
(interp_method == "nearest" || interp_method == "bilinear")) {
layout = framework::DataLayout::kMKLDNN;
library = framework::LibraryType::kMKLDNN;
}
#endif
return framework::OpKernelType(data_type, ctx.GetPlace(), layout, library);
} }
framework::OpKernelType GetKernelTypeForVar( framework::OpKernelType GetKernelTypeForVar(
const std::string& var_name, const Tensor& tensor, const std::string& var_name, const Tensor& tensor,
const framework::OpKernelType& expected_kernel_type) const override { const framework::OpKernelType& expected_kernel_type) const override {
#ifdef PADDLE_WITH_MKLDNN
if ((expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) &&
(tensor.layout() != framework::DataLayout::kMKLDNN)) {
auto attrs = Attrs();
auto ar = paddle::framework::AttrReader(attrs);
const std::string data_format = ar.Get<std::string>("data_layout");
auto dl = framework::StringToDataLayout(data_format);
// Some models may have intentionally set "AnyLayout" for pool
// op. Treat this as NCHW (default data_format value)
if (dl != framework::DataLayout::kAnyLayout) {
return framework::OpKernelType(expected_kernel_type.data_type_,
tensor.place(), dl);
}
}
#endif
if (var_name == "SizeTensor" || var_name == "Scale") { if (var_name == "SizeTensor" || var_name == "Scale") {
return expected_kernel_type; return expected_kernel_type;
} }
...@@ -436,6 +467,9 @@ class InterpolateV2OpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -436,6 +467,9 @@ class InterpolateV2OpMaker : public framework::OpProtoAndCheckerMaker {
"can be \'0\' for src_idx = scale*(dst_indx+0.5)-0.5 , " "can be \'0\' for src_idx = scale*(dst_indx+0.5)-0.5 , "
"can be \'1\' for src_idx = scale*dst_index .") "can be \'1\' for src_idx = scale*dst_index .")
.SetDefault(1); .SetDefault(1);
AddAttr<bool>("use_mkldnn",
"(bool, default false) Only used in mkldnn kernel")
.SetDefault(false);
AddComment(R"DOC( AddComment(R"DOC(
This operator samples input X to given output shape by using specified This operator samples input X to given output shape by using specified
interpolation method, the interpolation methods can be \"nearest\" interpolation method, the interpolation methods can be \"nearest\"
......
...@@ -33,7 +33,7 @@ class InterpolateMKLDNNHandler ...@@ -33,7 +33,7 @@ class InterpolateMKLDNNHandler
: public platform::MKLDNNHandlerT<T, dnnl::resampling_forward> { : public platform::MKLDNNHandlerT<T, dnnl::resampling_forward> {
public: public:
InterpolateMKLDNNHandler(const dnnl::algorithm algo, InterpolateMKLDNNHandler(const dnnl::algorithm algo,
const paddle::platform::MKLDNNDeviceContext& dev_ctx, const platform::MKLDNNDeviceContext& dev_ctx,
const dnnl::engine engine, platform::Place cpu_place, const dnnl::engine engine, platform::Place cpu_place,
const Tensor* x, Tensor* z, const Tensor* x, Tensor* z,
const std::string& uniq_name) const std::string& uniq_name)
...@@ -94,19 +94,32 @@ class InterpolateMKLDNNKernel : public framework::OpKernel<T> { ...@@ -94,19 +94,32 @@ class InterpolateMKLDNNKernel : public framework::OpKernel<T> {
out_dims = out_size_data; out_dims = out_size_data;
} }
} else { } else {
float scale; std::vector<float> scale;
scale.reserve(3);
auto scale_tensor = ctx.Input<Tensor>("Scale"); auto scale_tensor = ctx.Input<Tensor>("Scale");
if (scale_tensor != nullptr) { if (scale_tensor != nullptr) {
auto scale_data = get_new_data_from_tensor<float>(scale_tensor); auto scale_data = get_new_data_from_tensor<float>(scale_tensor);
scale = scale_data[0]; scale.resize(3, scale_data[0]);
std::copy(scale_data.begin(), scale_data.end(), scale.begin());
} else { } else {
scale = ctx.Attr<float>("scale"); std::string op_type = ctx.Type();
if (op_type.find("v2") == std::string::npos) { // v1
scale.push_back(ctx.Attr<float>("scale"));
scale.push_back(scale[0]);
scale.push_back(scale[0]);
} else { // v2
std::vector<float> scale_attr = ctx.Attr<std::vector<float>>("scale");
scale.resize(3, scale_attr[0]);
std::copy(scale_attr.begin(), scale_attr.end(), scale.begin());
}
} }
if (scale > 0) { if (scale[0] > 0.0f && scale[1] > 0.0f && scale[2] > 0.0f) {
int j = 0;
std::vector<int64_t> in_dhw_vec = framework::vectorize(in_dhw_dims); std::vector<int64_t> in_dhw_vec = framework::vectorize(in_dhw_dims);
std::transform( std::transform(
in_dhw_vec.begin(), in_dhw_vec.end(), out_dims.begin(), in_dhw_vec.begin(), in_dhw_vec.end(), out_dims.begin(),
[&](int64_t i) -> int { return static_cast<int>(i * scale); }); [&](int64_t i) -> int { return static_cast<int>(i * scale[j++]); });
} }
} }
...@@ -172,3 +185,8 @@ REGISTER_OP_KERNEL(nearest_interp, MKLDNN, ::paddle::platform::CPUPlace, ...@@ -172,3 +185,8 @@ REGISTER_OP_KERNEL(nearest_interp, MKLDNN, ::paddle::platform::CPUPlace,
ops::InterpolateMKLDNNKernel<float>); ops::InterpolateMKLDNNKernel<float>);
REGISTER_OP_KERNEL(bilinear_interp, MKLDNN, ::paddle::platform::CPUPlace, REGISTER_OP_KERNEL(bilinear_interp, MKLDNN, ::paddle::platform::CPUPlace,
ops::InterpolateMKLDNNKernel<float>); ops::InterpolateMKLDNNKernel<float>);
REGISTER_OP_KERNEL(nearest_interp_v2, MKLDNN, ::paddle::platform::CPUPlace,
ops::InterpolateMKLDNNKernel<float>);
REGISTER_OP_KERNEL(bilinear_interp_v2, MKLDNN, ::paddle::platform::CPUPlace,
ops::InterpolateMKLDNNKernel<float>);
...@@ -198,4 +198,6 @@ class TestBilinearNeighborInterpSame(TestBilinearInterpMKLDNNOp): ...@@ -198,4 +198,6 @@ class TestBilinearNeighborInterpSame(TestBilinearInterpMKLDNNOp):
if __name__ == "__main__": if __name__ == "__main__":
from paddle import enable_static
enable_static()
unittest.main() unittest.main()
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
import math
import paddle
import paddle.fluid.core as core
import paddle.fluid as fluid
from paddle.fluid.tests.unittests.op_test import OpTest
from paddle.fluid.tests.unittests.op_test import skip_check_grad_ci
def bilinear_interp_mkldnn_np(input,
out_h,
out_w,
out_size=None,
actual_shape=None,
data_layout='NCHW'):
"""bilinear interpolation implement in shape [N, C, H, W]"""
if data_layout == "NHWC":
input = np.transpose(input, (0, 3, 1, 2)) # NHWC => NCHW
if out_size is not None:
out_h = out_size[0]
out_w = out_size[1]
if actual_shape is not None:
out_h = actual_shape[0]
out_w = actual_shape[1]
batch_size, channel, in_h, in_w = input.shape
out = np.zeros((batch_size, channel, out_h, out_w))
for oh in range(out_h):
h0 = int(math.floor((oh + 0.5) * in_h / out_h - 0.5))
h1 = int(math.ceil((oh + 0.5) * in_h / out_h - 0.5))
h0 = max(h0, 0)
h1 = min(h1, in_h - 1)
Wh = (oh + 0.5) * in_h / out_h - 0.5 - h0
for ow in range(out_w):
w0 = int(math.floor((ow + 0.5) * in_w / out_w - 0.5))
w1 = int(math.ceil((ow + 0.5) * in_w / out_w - 0.5))
w0 = max(w0, 0)
w1 = min(w1, in_w - 1)
Ww = (ow + 0.5) * in_w / out_w - 0.5 - w0
input_h0_w0 = input[:, :, h0, w0]
input_h1_w0 = input[:, :, h1, w0]
input_h0_w1 = input[:, :, h0, w1]
input_h1_w1 = input[:, :, h1, w1]
out[:, :, oh, ow] = input_h0_w0 * (1 - Wh) * (
1 - Ww) + input_h1_w0 * Wh * (1 - Ww) + input_h0_w1 * (
1 - Wh) * Ww + input_h1_w1 * Wh * Ww
if data_layout == "NHWC":
out = np.transpose(out, (0, 2, 3, 1)) # NCHW => NHWC
return out.astype(input.dtype)
@skip_check_grad_ci(reason="Haven not implement interpolate grad kernel.")
class TestBilinearInterpMKLDNNOp(OpTest):
def init_test_case(self):
pass
def setUp(self):
self.op_type = "bilinear_interp_v2"
self.interp_method = 'bilinear'
self._cpu_only = True
self.use_mkldnn = True
self.input_shape = [1, 1, 2, 2]
self.data_layout = 'NCHW'
# priority: actual_shape > out_size > scale > out_h & out_w
self.out_h = 1
self.out_w = 1
self.scale = 2.0
self.out_size = None
self.actual_shape = None
self.init_test_case()
input_np = np.random.random(self.input_shape).astype("float32")
if self.data_layout == "NCHW":
in_h = self.input_shape[2]
in_w = self.input_shape[3]
else:
in_h = self.input_shape[1]
in_w = self.input_shape[2]
scale_h = 0
scale_w = 0
if self.scale:
if isinstance(self.scale, float) or isinstance(self.scale, int):
scale_h = float(self.scale)
scale_w = float(self.scale)
if isinstance(self.scale, list) and len(self.scale) == 1:
scale_w = self.scale[0]
scale_h = self.scale[0]
elif isinstance(self.scale, list) and len(self.scale) > 1:
scale_w = self.scale[1]
scale_h = self.scale[0]
if scale_h > 0 and scale_w > 0:
out_h = int(in_h * scale_h)
out_w = int(in_w * scale_w)
else:
out_h = self.out_h
out_w = self.out_w
output_np = bilinear_interp_mkldnn_np(input_np, out_h, out_w,
self.out_size, self.actual_shape,
self.data_layout)
if isinstance(self.scale, float):
self.scale = [self.scale, self.scale]
self.inputs = {'X': input_np}
if self.out_size is not None:
self.inputs['OutSize'] = self.out_size
if self.actual_shape is not None:
self.inputs['OutSize'] = self.actual_shape
self.attrs = {
'interp_method': self.interp_method,
'out_h': self.out_h,
'out_w': self.out_w,
'scale': self.scale,
'data_layout': self.data_layout,
'use_mkldnn': self.use_mkldnn
}
self.outputs = {'Out': output_np}
def test_check_output(self):
self.check_output(check_dygraph=False)
class TestBilinearInterpOpMKLDNNNHWC(TestBilinearInterpMKLDNNOp):
def init_test_case(self):
self.input_shape = [3, 2, 32, 16]
self.out_h = 27
self.out_w = 49
self.scale = [2.0, 3.0]
self.data_layout = 'NHWC'
class TestBilinearNeighborInterpMKLDNNCase2(TestBilinearInterpMKLDNNOp):
def init_test_case(self):
self.input_shape = [3, 3, 9, 6]
self.out_h = 12
self.out_w = 12
class TestBilinearNeighborInterpCase3(TestBilinearInterpMKLDNNOp):
def init_test_case(self):
self.input_shape = [1, 1, 32, 64]
self.out_h = 64
self.out_w = 128
self.scale = [0.1, 0.05]
class TestBilinearNeighborInterpCase4(TestBilinearInterpMKLDNNOp):
def init_test_case(self):
self.input_shape = [1, 1, 32, 64]
self.out_h = 64
self.out_w = 32
self.scale = [13.0, 15.0]
self.out_size = np.array([65, 129]).astype("int32")
class TestBilinearNeighborInterpCase5(TestBilinearInterpMKLDNNOp):
def init_test_case(self):
self.input_shape = [1, 1, 9, 6]
self.out_h = 12
self.out_w = 12
self.out_size = np.array([13, 13]).astype("int32")
class TestBilinearNeighborInterpCase6(TestBilinearInterpMKLDNNOp):
def init_test_case(self):
self.input_shape = [1, 1, 32, 64]
self.out_h = 64
self.out_w = 32
self.scale = 1.0
self.out_size = np.array([65, 129]).astype("int32")
class TestBilinearNeighborInterpSame(TestBilinearInterpMKLDNNOp):
def init_test_case(self):
self.input_shape = [2, 3, 32, 64]
self.out_h = 32
self.out_w = 64
self.scale = 2.0
self.out_size = np.array([65, 129]).astype("int32")
if __name__ == "__main__":
from paddle import enable_static
enable_static()
unittest.main()
...@@ -163,4 +163,6 @@ class TestNearestNeighborInterpSame(TestNearestInterpMKLDNNOp): ...@@ -163,4 +163,6 @@ class TestNearestNeighborInterpSame(TestNearestInterpMKLDNNOp):
if __name__ == "__main__": if __name__ == "__main__":
from paddle import enable_static
enable_static()
unittest.main() unittest.main()
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
import paddle
import paddle.fluid.core as core
import paddle.fluid as fluid
from paddle.fluid.tests.unittests.op_test import OpTest
from paddle.fluid.tests.unittests.op_test import skip_check_grad_ci
def nearest_neighbor_interp_mkldnn_np(X,
out_h,
out_w,
out_size=None,
actual_shape=None,
data_layout='NCHW'):
"""nearest neighbor interpolation implement in shape [N, C, H, W]"""
if data_layout == "NHWC":
X = np.transpose(X, (0, 3, 1, 2)) # NHWC => NCHW
if out_size is not None:
out_h = out_size[0]
out_w = out_size[1]
if actual_shape is not None:
out_h = actual_shape[0]
out_w = actual_shape[1]
n, c, in_h, in_w = X.shape
fh = fw = 0.0
if (out_h > 1):
fh = out_h * 1.0 / in_h
if (out_w > 1):
fw = out_w * 1.0 / in_w
out = np.zeros((n, c, out_h, out_w))
for oh in range(out_h):
ih = int(round((oh + 0.5) / fh - 0.5))
for ow in range(out_w):
iw = int(round((ow + 0.5) / fw - 0.5))
out[:, :, oh, ow] = X[:, :, ih, iw]
if data_layout == "NHWC":
out = np.transpose(out, (0, 2, 3, 1)) # NCHW => NHWC
return out.astype(X.dtype)
@skip_check_grad_ci(reason="Haven not implement interpolate grad kernel.")
class TestNearestInterpV2MKLDNNOp(OpTest):
def init_test_case(self):
pass
def setUp(self):
self.op_type = "nearest_interp_v2"
self.interp_method = 'nearest'
self._cpu_only = True
self.use_mkldnn = True
self.input_shape = [1, 1, 2, 2]
self.data_layout = 'NCHW'
# priority: actual_shape > out_size > scale > out_h & out_w
self.out_h = 1
self.out_w = 1
self.scale = [2.0, 3.0]
self.out_size = None
self.actual_shape = None
self.init_test_case()
input_np = np.random.random(self.input_shape).astype("float32")
if self.data_layout == "NCHW":
in_h = self.input_shape[2]
in_w = self.input_shape[3]
else:
in_h = self.input_shape[1]
in_w = self.input_shape[2]
scale_h = 0
scale_w = 0
if self.scale:
if isinstance(self.scale, float) or isinstance(self.scale, int):
scale_h = float(self.scale)
scale_w = float(self.scale)
if isinstance(self.scale, list) and len(self.scale) == 1:
scale_w = self.scale[0]
scale_h = self.scale[0]
elif isinstance(self.scale, list) and len(self.scale) > 1:
scale_w = self.scale[1]
scale_h = self.scale[0]
if scale_h > 0 and scale_w > 0:
out_h = int(in_h * scale_h)
out_w = int(in_w * scale_w)
else:
out_h = self.out_h
out_w = self.out_w
output_np = nearest_neighbor_interp_mkldnn_np(
input_np, out_h, out_w, self.out_size, self.actual_shape,
self.data_layout)
if isinstance(self.scale, float):
self.scale = [self.scale]
self.inputs = {'X': input_np}
if self.out_size is not None:
self.inputs['OutSize'] = self.out_size
if self.actual_shape is not None:
self.inputs['OutSize'] = self.actual_shape
self.attrs = {
'interp_method': self.interp_method,
'out_h': self.out_h,
'out_w': self.out_w,
'scale': self.scale,
'data_layout': self.data_layout,
'use_mkldnn': self.use_mkldnn
}
self.outputs = {'Out': output_np}
def test_check_output(self):
self.check_output(check_dygraph=False)
class TestNearestInterpOpV2MKLDNNNHWC(TestNearestInterpV2MKLDNNOp):
def init_test_case(self):
self.input_shape = [3, 2, 32, 16]
self.out_h = 27
self.out_w = 49
self.scale = [2.0, 3.0]
self.data_layout = 'NHWC'
class TestNearestNeighborInterpV2MKLDNNCase2(TestNearestInterpV2MKLDNNOp):
def init_test_case(self):
self.input_shape = [3, 3, 9, 6]
self.out_h = 12
self.out_w = 12
class TestNearestNeighborInterpV2MKLDNNCase3(TestNearestInterpV2MKLDNNOp):
def init_test_case(self):
self.input_shape = [1, 1, 32, 64]
self.out_h = 64
self.out_w = 128
self.scale = [0.1, 0.05]
class TestNearestNeighborInterpV2MKLDNNCase4(TestNearestInterpV2MKLDNNOp):
def init_test_case(self):
self.input_shape = [1, 1, 32, 64]
self.out_h = 64
self.out_w = 32
self.scale = [13.0, 15.0]
self.out_size = np.array([65, 129]).astype("int32")
class TestNearestNeighborInterpV2MKLDNNSame(TestNearestInterpV2MKLDNNOp):
def init_test_case(self):
self.input_shape = [2, 3, 32, 64]
self.out_h = 32
self.out_w = 64
self.out_size = np.array([65, 129]).astype("int32")
if __name__ == "__main__":
from paddle import enable_static
enable_static()
unittest.main()
...@@ -603,7 +603,9 @@ STATIC_MODE_TESTING_LIST = [ ...@@ -603,7 +603,9 @@ STATIC_MODE_TESTING_LIST = [
'test_fc_mkldnn_op', 'test_fc_mkldnn_op',
'test_fc_bf16_mkldnn_op', 'test_fc_bf16_mkldnn_op',
'test_nearest_interp_mkldnn_op', 'test_nearest_interp_mkldnn_op',
'test_nearest_interp_v2_mkldnn_op',
'test_bilinear_interp_mkldnn_op', 'test_bilinear_interp_mkldnn_op',
'test_bilinear_interp_v2_mkldnn_op',
'test_fusion_gru_int8_mkldnn_op', 'test_fusion_gru_int8_mkldnn_op',
'test_fusion_gru_bf16_mkldnn_op', 'test_fusion_gru_bf16_mkldnn_op',
'test_fusion_gru_mkldnn_op', 'test_fusion_gru_mkldnn_op',
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册