未验证 提交 1f82c0cd 编写于 作者: R ruri 提交者: GitHub

[Api2.0] add pixel shuffle (#26071)

上级 1ed74aae
...@@ -28,25 +28,44 @@ class PixelShuffleOp : public framework::OperatorWithKernel { ...@@ -28,25 +28,44 @@ class PixelShuffleOp : public framework::OperatorWithKernel {
"Output(Out) of PixelShuffleOp should not be null.")); "Output(Out) of PixelShuffleOp should not be null."));
auto input_dims = ctx->GetInputDim("X"); auto input_dims = ctx->GetInputDim("X");
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(input_dims.size(), 4,
input_dims.size(), 4, platform::errors::InvalidArgument(
platform::errors::InvalidArgument( "Input should be a 4-D tensor of format [N, C, H, W] "
"Input should be a 4-D tensor of format [N, C, H, W], but got %u.", "or [N, H, W, C], but got %u.",
input_dims.size())); input_dims.size()));
auto upscale_factor = ctx->Attrs().Get<int>("upscale_factor"); auto upscale_factor = ctx->Attrs().Get<int>("upscale_factor");
PADDLE_ENFORCE_EQ(input_dims[1] % (upscale_factor * upscale_factor), 0, const std::string data_format =
platform::errors::InvalidArgument( ctx->Attrs().Get<std::string>("data_format");
"The square of upscale_factor[%u] should divide the " const bool channel_last = (data_format == "NHWC");
"number of channel[%u]",
input_dims[1], upscale_factor * upscale_factor)); if (!channel_last) {
PADDLE_ENFORCE_EQ(
input_dims[1] % (upscale_factor * upscale_factor), 0,
platform::errors::InvalidArgument(
"The square of upscale_factor[%u] should divide the "
"number of channel[%u]",
input_dims[1], upscale_factor * upscale_factor));
} else {
PADDLE_ENFORCE_EQ(
input_dims[3] % (upscale_factor * upscale_factor), 0,
platform::errors::InvalidArgument(
"The square of upscale_factor[%u] should divide the "
"number of channel[%u]",
input_dims[3], upscale_factor * upscale_factor));
}
auto output_dims = input_dims; auto output_dims = input_dims;
output_dims[0] = input_dims[0]; output_dims[0] = input_dims[0];
output_dims[1] = input_dims[1] / (upscale_factor * upscale_factor); if (!channel_last) {
output_dims[2] = input_dims[2] * upscale_factor; output_dims[1] = input_dims[1] / (upscale_factor * upscale_factor);
output_dims[3] = input_dims[3] * upscale_factor; output_dims[2] = input_dims[2] * upscale_factor;
output_dims[3] = input_dims[3] * upscale_factor;
} else {
output_dims[1] = input_dims[1] * upscale_factor;
output_dims[2] = input_dims[2] * upscale_factor;
output_dims[3] = input_dims[3] / (upscale_factor * upscale_factor);
}
ctx->SetOutputDim("Out", output_dims); ctx->SetOutputDim("Out", output_dims);
} }
}; };
...@@ -54,14 +73,14 @@ class PixelShuffleOp : public framework::OperatorWithKernel { ...@@ -54,14 +73,14 @@ class PixelShuffleOp : public framework::OperatorWithKernel {
class PixelShuffleOpMaker : public framework::OpProtoAndCheckerMaker { class PixelShuffleOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
void Make() override { void Make() override {
AddInput( AddInput("X",
"X", "(Tensor, default Tensor<float>), "
"(Tensor, default Tensor<float>), " "the input feature data of PixelShuffleOp, the layout is [N, C, "
"the input feature data of PixelShuffleOp, the layout is [N C H W]."); "H, W] or [N, H, W, C].");
AddOutput( AddOutput("Out",
"Out", "(Tensor, default Tensor<float>), the output of "
"(Tensor, default Tensor<float>), the output of " "PixelShuffleOp. The layout is [N, C/factor^2, H*factor, "
"PixelShuffleOp. The layout is [N,C/factor^2,H*factor,W*factor]."); "W*factor] or [N, H*factor, W*factor, C/factor^2].");
AddAttr<int>("upscale_factor", AddAttr<int>("upscale_factor",
"the factor to increase spatial resolution by.") "the factor to increase spatial resolution by.")
.SetDefault(1) .SetDefault(1)
...@@ -70,6 +89,11 @@ class PixelShuffleOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -70,6 +89,11 @@ class PixelShuffleOpMaker : public framework::OpProtoAndCheckerMaker {
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"upscale_factor should be larger than 0.")); "upscale_factor should be larger than 0."));
}); });
AddAttr<std::string>(
"data_format",
"An optional string from: \"NHWC\", \"NCHW\". "
"Defaults to \"NHWC\", Specify the data format of the input data.")
.SetDefault("NCHW");
AddComment(R"DOC( AddComment(R"DOC(
Pixel Shuffle operator Pixel Shuffle operator
...@@ -114,19 +138,30 @@ class PixelShuffleGradOp : public framework::OperatorWithKernel { ...@@ -114,19 +138,30 @@ class PixelShuffleGradOp : public framework::OperatorWithKernel {
platform::errors::NotFound("Output(X@Grad) should not be null")); platform::errors::NotFound("Output(X@Grad) should not be null"));
auto do_dims = ctx->GetInputDim(framework::GradVarName("Out")); auto do_dims = ctx->GetInputDim(framework::GradVarName("Out"));
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(do_dims.size(), 4,
do_dims.size(), 4, platform::errors::InvalidArgument(
platform::errors::InvalidArgument( "Input should be a 4-D tensor of format [N, C, H, W] "
"Input should be a 4-D tensor of format [N, C, H, W], but got %u.", "or [N, H, W, C], but got %u.",
do_dims.size())); do_dims.size()));
auto upscale_factor = ctx->Attrs().Get<int>("upscale_factor"); auto upscale_factor = ctx->Attrs().Get<int>("upscale_factor");
const std::string data_format =
ctx->Attrs().Get<std::string>("data_format");
const bool channel_last = (data_format == "NHWC");
auto dx_dims = do_dims; auto dx_dims = do_dims;
dx_dims[0] = do_dims[0]; dx_dims[0] = do_dims[0];
dx_dims[1] = do_dims[1] * (upscale_factor * upscale_factor);
dx_dims[2] = do_dims[2] / upscale_factor; if (!channel_last) {
dx_dims[3] = do_dims[3] / upscale_factor; dx_dims[1] = do_dims[1] * (upscale_factor * upscale_factor);
dx_dims[2] = do_dims[2] / upscale_factor;
dx_dims[3] = do_dims[3] / upscale_factor;
} else {
dx_dims[1] = do_dims[1] / upscale_factor;
dx_dims[2] = do_dims[2] / upscale_factor;
dx_dims[3] = do_dims[3] * (upscale_factor * upscale_factor);
}
ctx->SetOutputDim(framework::GradVarName("X"), dx_dims); ctx->SetOutputDim(framework::GradVarName("X"), dx_dims);
} }
}; };
......
...@@ -11,6 +11,7 @@ limitations under the License. */ ...@@ -11,6 +11,7 @@ limitations under the License. */
#pragma once #pragma once
#include <algorithm> #include <algorithm>
#include <string>
#include <vector> #include <vector>
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
...@@ -24,23 +25,33 @@ class PixelShuffleOpKernel : public framework::OpKernel<T> { ...@@ -24,23 +25,33 @@ class PixelShuffleOpKernel : public framework::OpKernel<T> {
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
auto* in = ctx.Input<framework::Tensor>("X"); auto* in = ctx.Input<framework::Tensor>("X");
auto* out = ctx.Output<framework::Tensor>("Out"); auto* out = ctx.Output<framework::Tensor>("Out");
out->mutable_data<T>(ctx.GetPlace()); out->mutable_data<T>(ctx.GetPlace());
int factor = ctx.Attr<int>("upscale_factor"); int factor = ctx.Attr<int>("upscale_factor");
std::string data_format = ctx.Attr<std::string>("data_format");
bool channel_last = (data_format == "NHWC");
auto in_dims = in->dims(); auto in_dims = in->dims();
auto o_dims = out->dims(); auto o_dims = out->dims();
framework::Tensor t; framework::Tensor t;
t.ShareDataWith(*in); t.ShareDataWith(*in);
t.Resize({in_dims[0], o_dims[1], factor, factor, in_dims[2], in_dims[3]}); if (!channel_last) {
t.Resize({in_dims[0], o_dims[1], factor, factor, in_dims[2], in_dims[3]});
} else {
t.Resize({in_dims[0], in_dims[1], in_dims[2], o_dims[3], factor, factor});
}
std::vector<int> axis = {0, 1, 4, 2, 5, 3}; std::vector<int> axis = {0, 1, 4, 2, 5, 3};
framework::Tensor o; framework::Tensor o;
o.ShareDataWith(*out); o.ShareDataWith(*out);
o.Resize({in_dims[0], o_dims[1], in_dims[2], factor, in_dims[3], factor}); if (!channel_last) {
o.Resize({in_dims[0], o_dims[1], in_dims[2], factor, in_dims[3], factor});
} else {
o.Resize({in_dims[0], in_dims[1], factor, in_dims[2], factor, o_dims[3]});
}
math::Transpose<DeviceContext, T, 6> trans; math::Transpose<DeviceContext, T, 6> trans;
auto& dev_ctx = ctx.template device_context<DeviceContext>(); auto& dev_ctx = ctx.template device_context<DeviceContext>();
trans(dev_ctx, t, &o, axis); trans(dev_ctx, t, &o, axis);
...@@ -58,19 +69,32 @@ class PixelShuffleGradOpKernel : public framework::OpKernel<T> { ...@@ -58,19 +69,32 @@ class PixelShuffleGradOpKernel : public framework::OpKernel<T> {
int factor = ctx.Attr<int>("upscale_factor"); int factor = ctx.Attr<int>("upscale_factor");
std::string data_format = ctx.Attr<std::string>("data_format");
bool channel_last = (data_format == "NHWC");
auto do_dims = dout->dims(); auto do_dims = dout->dims();
auto dx_dims = dx->dims(); auto dx_dims = dx->dims();
framework::Tensor t; framework::Tensor t;
t.ShareDataWith(*dout); t.ShareDataWith(*dout);
t.Resize({do_dims[0], do_dims[1], dx_dims[2], factor, dx_dims[3], factor}); if (!channel_last) {
t.Resize(
{do_dims[0], do_dims[1], dx_dims[2], factor, dx_dims[3], factor});
} else {
t.Resize(
{do_dims[0], dx_dims[1], factor, dx_dims[2], factor, do_dims[3]});
}
std::vector<int> axis = {0, 1, 3, 5, 2, 4}; std::vector<int> axis = {0, 1, 3, 5, 2, 4};
framework::Tensor o; framework::Tensor o;
o.ShareDataWith(*dx); o.ShareDataWith(*dx);
o.Resize({do_dims[0], do_dims[1], factor, factor, dx_dims[2], dx_dims[3]}); if (!channel_last) {
o.Resize(
{do_dims[0], do_dims[1], factor, factor, dx_dims[2], dx_dims[3]});
} else {
o.Resize(
{do_dims[0], dx_dims[1], dx_dims[2], do_dims[3], factor, factor});
}
math::Transpose<DeviceContext, T, 6> trans; math::Transpose<DeviceContext, T, 6> trans;
auto& dev_ctx = ctx.template device_context<DeviceContext>(); auto& dev_ctx = ctx.template device_context<DeviceContext>();
trans(dev_ctx, t, &o, axis); trans(dev_ctx, t, &o, axis);
......
...@@ -16,16 +16,17 @@ from __future__ import print_function ...@@ -16,16 +16,17 @@ from __future__ import print_function
import unittest import unittest
import numpy as np import numpy as np
from op_test import OpTest from op_test import OpTest
import paddle
import paddle.nn.functional as F
import paddle.fluid.core as core
import paddle.fluid as fluid
class TestPixelShuffle(OpTest): def pixel_shuffle_np(x, up_factor, data_format="NCHW"):
def setUp(self): if data_format == "NCHW":
self.op_type = "pixel_shuffle" n, c, h, w = x.shape
n, c, h, w = 2, 9, 4, 4
up_factor = 3
shape = [n, c, h, w]
x = np.random.random(shape).astype("float64")
new_shape = (n, c // (up_factor * up_factor), up_factor, up_factor, h, new_shape = (n, c // (up_factor * up_factor), up_factor, up_factor, h,
w) w)
# reshape to (num,output_channel,upscale_factor,upscale_factor,h,w) # reshape to (num,output_channel,upscale_factor,upscale_factor,h,w)
...@@ -34,10 +35,42 @@ class TestPixelShuffle(OpTest): ...@@ -34,10 +35,42 @@ class TestPixelShuffle(OpTest):
npresult = npresult.transpose(0, 1, 4, 2, 5, 3) npresult = npresult.transpose(0, 1, 4, 2, 5, 3)
oshape = [n, c // (up_factor * up_factor), h * up_factor, w * up_factor] oshape = [n, c // (up_factor * up_factor), h * up_factor, w * up_factor]
npresult = np.reshape(npresult, oshape) npresult = np.reshape(npresult, oshape)
return npresult
else:
n, h, w, c = x.shape
new_shape = (n, h, w, c // (up_factor * up_factor), up_factor,
up_factor)
# reshape to (num,h,w,output_channel,upscale_factor,upscale_factor)
npresult = np.reshape(x, new_shape)
# transpose to (num,h,upscale_factor,w,upscale_factor,output_channel)
npresult = npresult.transpose(0, 1, 4, 2, 5, 3)
oshape = [n, h * up_factor, w * up_factor, c // (up_factor * up_factor)]
npresult = np.reshape(npresult, oshape)
return npresult
class TestPixelShuffleOp(OpTest):
def setUp(self):
self.op_type = "pixel_shuffle"
self.init_data_format()
n, c, h, w = 2, 9, 4, 4
if self.format == "NCHW":
shape = [n, c, h, w]
if self.format == "NHWC":
shape = [n, h, w, c]
up_factor = 3
x = np.random.random(shape).astype("float64")
npresult = pixel_shuffle_np(x, up_factor, self.format)
self.inputs = {'X': x} self.inputs = {'X': x}
self.outputs = {'Out': npresult} self.outputs = {'Out': npresult}
self.attrs = {'upscale_factor': up_factor} self.attrs = {'upscale_factor': up_factor, "data_format": self.format}
def init_data_format(self):
self.format = "NCHW"
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output()
...@@ -46,5 +79,141 @@ class TestPixelShuffle(OpTest): ...@@ -46,5 +79,141 @@ class TestPixelShuffle(OpTest):
self.check_grad(['X'], 'Out') self.check_grad(['X'], 'Out')
class TestChannelLast(TestPixelShuffleOp):
def init_data_format(self):
self.format = "NHWC"
class TestPixelShuffleAPI(unittest.TestCase):
def setUp(self):
self.x_1_np = np.random.random([2, 9, 4, 4]).astype("float64")
self.x_2_np = np.random.random([2, 4, 4, 9]).astype("float64")
self.out_1_np = pixel_shuffle_np(self.x_1_np, 3)
self.out_2_np = pixel_shuffle_np(self.x_2_np, 3, "NHWC")
def test_static_graph_functional(self):
for use_cuda in ([False, True]
if core.is_compiled_with_cuda() else [False]):
place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
paddle.enable_static()
x_1 = paddle.data(name="x", shape=[2, 9, 4, 4], dtype="float64")
x_2 = paddle.data(name="x2", shape=[2, 4, 4, 9], dtype="float64")
out_1 = F.pixel_shuffle(x_1, 3)
out_2 = F.pixel_shuffle(x_2, 3, "NHWC")
exe = paddle.static.Executor(place=place)
res_1 = exe.run(fluid.default_main_program(),
feed={"x": self.x_1_np},
fetch_list=out_1,
use_prune=True)
res_2 = exe.run(fluid.default_main_program(),
feed={"x2": self.x_2_np},
fetch_list=out_2,
use_prune=True)
assert np.allclose(res_1, self.out_1_np)
assert np.allclose(res_2, self.out_2_np)
# same test between layer and functional in this op.
def test_static_graph_layer(self):
for use_cuda in ([False, True]
if core.is_compiled_with_cuda() else [False]):
place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
paddle.enable_static()
x_1 = paddle.data(name="x", shape=[2, 9, 4, 4], dtype="float64")
x_2 = paddle.data(name="x2", shape=[2, 4, 4, 9], dtype="float64")
# init instance
ps_1 = paddle.nn.PixelShuffle(3)
ps_2 = paddle.nn.PixelShuffle(3, "NHWC")
out_1 = ps_1(x_1)
out_2 = ps_2(x_2)
out_1_np = pixel_shuffle_np(self.x_1_np, 3)
out_2_np = pixel_shuffle_np(self.x_2_np, 3, "NHWC")
exe = paddle.static.Executor(place=place)
res_1 = exe.run(fluid.default_main_program(),
feed={"x": self.x_1_np},
fetch_list=out_1,
use_prune=True)
res_2 = exe.run(fluid.default_main_program(),
feed={"x2": self.x_2_np},
fetch_list=out_2,
use_prune=True)
assert np.allclose(res_1, out_1_np)
assert np.allclose(res_2, out_2_np)
def run_dygraph(self, up_factor, data_format):
n, c, h, w = 2, 9, 4, 4
if data_format == "NCHW":
shape = [n, c, h, w]
if data_format == "NHWC":
shape = [n, h, w, c]
x = np.random.random(shape).astype("float64")
npresult = pixel_shuffle_np(x, up_factor, data_format)
for use_cuda in ([False, True]
if core.is_compiled_with_cuda() else [False]):
place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
paddle.disable_static(place=place)
pixel_shuffle = paddle.nn.PixelShuffle(
up_factor, data_format=data_format)
result = pixel_shuffle(paddle.to_tensor(x))
self.assertTrue(np.allclose(result.numpy(), npresult))
result_functional = F.pixel_shuffle(
paddle.to_tensor(x), 3, data_format)
self.assertTrue(np.allclose(result_functional.numpy(), npresult))
def test_dygraph1(self):
self.run_dygraph(3, "NCHW")
def test_dygraph2(self):
self.run_dygraph(3, "NHWC")
class TestPixelShuffleError(unittest.TestCase):
def test_error_functional(self):
def error_upscale_factor():
with paddle.fluid.dygraph.guard():
x = np.random.random([2, 9, 4, 4]).astype("float64")
pixel_shuffle = F.pixel_shuffle(paddle.to_tensor(x), 3.33)
self.assertRaises(TypeError, error_upscale_factor)
def error_data_format():
with paddle.fluid.dygraph.guard():
x = np.random.random([2, 9, 4, 4]).astype("float64")
pixel_shuffle = F.pixel_shuffle(paddle.to_tensor(x), 3, "WOW")
self.assertRaises(ValueError, error_data_format)
def test_error_layer(self):
def error_upscale_factor_layer():
with paddle.fluid.dygraph.guard():
x = np.random.random([2, 9, 4, 4]).astype("float64")
ps = paddle.nn.PixelShuffle(3.33)
self.assertRaises(TypeError, error_upscale_factor_layer)
def error_data_format_layer():
with paddle.fluid.dygraph.guard():
x = np.random.random([2, 9, 4, 4]).astype("float64")
ps = paddle.nn.PixelShuffle(3, "MEOW")
self.assertRaises(ValueError, error_data_format_layer)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -139,7 +139,10 @@ from .layer.transformer import TransformerDecoder ...@@ -139,7 +139,10 @@ from .layer.transformer import TransformerDecoder
from .layer.transformer import Transformer from .layer.transformer import Transformer
from .layer.distance import PairwiseDistance #DEFINE_ALIAS from .layer.distance import PairwiseDistance #DEFINE_ALIAS
from .layer.vision import PixelShuffle
from .layer import loss #DEFINE_ALIAS from .layer import loss #DEFINE_ALIAS
from .layer import conv #DEFINE_ALIAS from .layer import conv #DEFINE_ALIAS
from .layer import vision #DEFINE_ALIAS
from ..fluid.dygraph.layers import Layer #DEFINE_ALIAS from ..fluid.dygraph.layers import Layer #DEFINE_ALIAS
from ..fluid.dygraph.container import LayerList, ParameterList, Sequential #DEFINE_ALIAS from ..fluid.dygraph.container import LayerList, ParameterList, Sequential #DEFINE_ALIAS
...@@ -194,7 +194,7 @@ from .vision import box_clip #DEFINE_ALIAS ...@@ -194,7 +194,7 @@ from .vision import box_clip #DEFINE_ALIAS
from .vision import box_coder #DEFINE_ALIAS from .vision import box_coder #DEFINE_ALIAS
from .vision import box_decoder_and_assign #DEFINE_ALIAS from .vision import box_decoder_and_assign #DEFINE_ALIAS
from .vision import collect_fpn_proposals #DEFINE_ALIAS from .vision import collect_fpn_proposals #DEFINE_ALIAS
# from .vision import deformable_conv #DEFINE_ALIAS # from .vision import deformable_conv #DEFINE_ALIAS
from .vision import deformable_roi_pooling #DEFINE_ALIAS from .vision import deformable_roi_pooling #DEFINE_ALIAS
from .vision import density_prior_box #DEFINE_ALIAS from .vision import density_prior_box #DEFINE_ALIAS
from .vision import detection_output #DEFINE_ALIAS from .vision import detection_output #DEFINE_ALIAS
...@@ -206,7 +206,7 @@ from .vision import generate_proposals #DEFINE_ALIAS ...@@ -206,7 +206,7 @@ from .vision import generate_proposals #DEFINE_ALIAS
from .vision import grid_sample #DEFINE_ALIAS from .vision import grid_sample #DEFINE_ALIAS
from .vision import image_resize #DEFINE_ALIAS from .vision import image_resize #DEFINE_ALIAS
from .vision import image_resize_short #DEFINE_ALIAS from .vision import image_resize_short #DEFINE_ALIAS
# from .vision import multi_box_head #DEFINE_ALIAS # from .vision import multi_box_head #DEFINE_ALIAS
from .vision import pixel_shuffle #DEFINE_ALIAS from .vision import pixel_shuffle #DEFINE_ALIAS
from .vision import prior_box #DEFINE_ALIAS from .vision import prior_box #DEFINE_ALIAS
from .vision import prroi_pool #DEFINE_ALIAS from .vision import prroi_pool #DEFINE_ALIAS
......
...@@ -12,7 +12,10 @@ ...@@ -12,7 +12,10 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# TODO: define specitial functions used in computer vision task from ...fluid.data_feeder import check_variable_and_dtype
from ...fluid.layer_helper import LayerHelper
from ...fluid.framework import core, in_dygraph_mode
from ...fluid.layers import affine_channel #DEFINE_ALIAS from ...fluid.layers import affine_channel #DEFINE_ALIAS
from ...fluid.layers import affine_grid #DEFINE_ALIAS from ...fluid.layers import affine_grid #DEFINE_ALIAS
from ...fluid.layers import anchor_generator #DEFINE_ALIAS from ...fluid.layers import anchor_generator #DEFINE_ALIAS
...@@ -28,6 +31,7 @@ from ...fluid.layers import distribute_fpn_proposals #DEFINE_ALIAS ...@@ -28,6 +31,7 @@ from ...fluid.layers import distribute_fpn_proposals #DEFINE_ALIAS
from ...fluid.layers import generate_mask_labels #DEFINE_ALIAS from ...fluid.layers import generate_mask_labels #DEFINE_ALIAS
from ...fluid.layers import generate_proposal_labels #DEFINE_ALIAS from ...fluid.layers import generate_proposal_labels #DEFINE_ALIAS
from ...fluid.layers import generate_proposals #DEFINE_ALIAS from ...fluid.layers import generate_proposals #DEFINE_ALIAS
from ...fluid.layers import grid_sampler #DEFINE_ALIAS
from ...fluid.layers import image_resize #DEFINE_ALIAS from ...fluid.layers import image_resize #DEFINE_ALIAS
from ...fluid.layers import prior_box #DEFINE_ALIAS from ...fluid.layers import prior_box #DEFINE_ALIAS
from ...fluid.layers import prroi_pool #DEFINE_ALIAS from ...fluid.layers import prroi_pool #DEFINE_ALIAS
...@@ -43,7 +47,7 @@ from ...fluid.layers import yolov3_loss #DEFINE_ALIAS ...@@ -43,7 +47,7 @@ from ...fluid.layers import yolov3_loss #DEFINE_ALIAS
from ...fluid.layers import fsp_matrix #DEFINE_ALIAS from ...fluid.layers import fsp_matrix #DEFINE_ALIAS
from ...fluid.layers import image_resize_short #DEFINE_ALIAS from ...fluid.layers import image_resize_short #DEFINE_ALIAS
from ...fluid.layers import pixel_shuffle #DEFINE_ALIAS # from ...fluid.layers import pixel_shuffle #DEFINE_ALIAS
from ...fluid.layers import retinanet_detection_output #DEFINE_ALIAS from ...fluid.layers import retinanet_detection_output #DEFINE_ALIAS
from ...fluid.layers import retinanet_target_assign #DEFINE_ALIAS from ...fluid.layers import retinanet_target_assign #DEFINE_ALIAS
from ...fluid.layers import roi_perspective_transform #DEFINE_ALIAS from ...fluid.layers import roi_perspective_transform #DEFINE_ALIAS
...@@ -67,7 +71,7 @@ __all__ = [ ...@@ -67,7 +71,7 @@ __all__ = [
'generate_mask_labels', 'generate_mask_labels',
'generate_proposal_labels', 'generate_proposal_labels',
'generate_proposals', 'generate_proposals',
'grid_sample', 'grid_sampler',
'image_resize', 'image_resize',
'image_resize_short', 'image_resize_short',
# 'multi_box_head', # 'multi_box_head',
...@@ -89,8 +93,6 @@ __all__ = [ ...@@ -89,8 +93,6 @@ __all__ = [
'yolov3_loss' 'yolov3_loss'
] ]
from ...fluid.layer_helper import LayerHelper
from ...fluid.data_feeder import check_variable_and_dtype
from ...fluid import core, dygraph_utils from ...fluid import core, dygraph_utils
from ...fluid.framework import Variable, in_dygraph_mode from ...fluid.framework import Variable, in_dygraph_mode
from ...device import get_cudnn_version from ...device import get_cudnn_version
...@@ -112,22 +114,16 @@ def grid_sample(x, ...@@ -112,22 +114,16 @@ def grid_sample(x,
data x and y is indexing the 3rd dimension (in height dimension), data x and y is indexing the 3rd dimension (in height dimension),
finally results is the bilinear interpolation or nearest value of 4 nearest corner finally results is the bilinear interpolation or nearest value of 4 nearest corner
points. The output tensor shape will be [N, C, H, W]. points. The output tensor shape will be [N, C, H, W].
.. code-block:: text .. code-block:: text
Step 1: Step 1:
Get (x, y) grid coordinates and scale to [0, H-1/W-1]. Get (x, y) grid coordinates and scale to [0, H-1/W-1].
.. code-block:: text .. code-block:: text
grid_x = 0.5 * (grid[:, :, :, 0] + 1) * (W - 1) grid_x = 0.5 * (grid[:, :, :, 0] + 1) * (W - 1)
grid_y = 0.5 * (grid[:, :, :, 1] + 1) * (H - 1) grid_y = 0.5 * (grid[:, :, :, 1] + 1) * (H - 1)
Step 2: Step 2:
Indices input data X with grid (x, y) in each [H, W] area, and bilinear Indices input data X with grid (x, y) in each [H, W] area, and bilinear
interpolate point value by 4 nearest points or nearest interpolate point value interpolate point value by 4 nearest points or nearest interpolate point value
by nearest point. by nearest point.
wn ------- y_n ------- en wn ------- y_n ------- en
| | | | | |
| d_n | | d_n |
...@@ -137,27 +133,21 @@ def grid_sample(x, ...@@ -137,27 +133,21 @@ def grid_sample(x,
| d_s | | d_s |
| | | | | |
ws ------- y_s ------- wn ws ------- y_s ------- wn
For bilinear interpolation: For bilinear interpolation:
x_w = floor(x) // west side x coord x_w = floor(x) // west side x coord
x_e = x_w + 1 // east side x coord x_e = x_w + 1 // east side x coord
y_n = floor(y) // north side y coord y_n = floor(y) // north side y coord
y_s = y_s + 1 // south side y coord y_s = y_s + 1 // south side y coord
d_w = grid_x - x_w // distance to west side d_w = grid_x - x_w // distance to west side
d_e = x_e - grid_x // distance to east side d_e = x_e - grid_x // distance to east side
d_n = grid_y - y_n // distance to north side d_n = grid_y - y_n // distance to north side
d_s = y_s - grid_y // distance to south side d_s = y_s - grid_y // distance to south side
wn = X[:, :, y_n, x_w] // north-west point value wn = X[:, :, y_n, x_w] // north-west point value
en = X[:, :, y_n, x_e] // north-east point value en = X[:, :, y_n, x_e] // north-east point value
ws = X[:, :, y_s, x_w] // south-east point value ws = X[:, :, y_s, x_w] // south-east point value
es = X[:, :, y_s, x_w] // north-east point value es = X[:, :, y_s, x_w] // north-east point value
output = wn * d_e * d_s + en * d_w * d_s output = wn * d_e * d_s + en * d_w * d_s
+ ws * d_e * d_n + es * d_w * d_n + ws * d_e * d_n + es * d_w * d_n
Args: Args:
x(Tensor): The input tensor, which is a 4-d tensor with shape x(Tensor): The input tensor, which is a 4-d tensor with shape
[N, C, H, W], N is the batch size, C is the channel [N, C, H, W], N is the batch size, C is the channel
...@@ -176,14 +166,10 @@ def grid_sample(x, ...@@ -176,14 +166,10 @@ def grid_sample(x,
name(str, optional): For detailed information, please refer name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and to :ref:`api_guide_Name`. Usually name is no need to set and
None by default. None by default.
Returns: Tensor, The shape of output is [N, C, grid_H, grid_W] in which `grid_H` is the height of grid Returns: Tensor, The shape of output is [N, C, grid_H, grid_W] in which `grid_H` is the height of grid
and `grid_W` is the width of grid. The data type is same as input tensor. and `grid_W` is the width of grid. The data type is same as input tensor.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle import paddle
import paddle.nn.functional as F import paddle.nn.functional as F
import numpy as np import numpy as np
...@@ -272,3 +258,57 @@ def grid_sample(x, ...@@ -272,3 +258,57 @@ def grid_sample(x,
attrs=attrs, attrs=attrs,
outputs={'Output': out}) outputs={'Output': out})
return out return out
def pixel_shuffle(x, upscale_factor, data_format="NCHW", name=None):
"""
This API implements pixel shuffle operation.
See more details in :ref:`api_nn_vision_PixelShuffle` .
Parameters:
x(Tensor): 4-D tensor, the data type should be float32 or float64.
upscale_factor(int): factor to increase spatial resolution.
data_format (str): The data format of the input and output data. An optional string from: "NCHW", "NHWC". The default is "NCHW". When it is "NCHW", the data is stored in the order of: [batch_size, input_channels, input_height, input_width].
name (str, optional): The default value is None. Normally there is no need for user to set this property.
Returns:
Out(tensor): Reshaped tensor according to the new dimension.
Raises:
ValueError: If the square of upscale_factor cannot divide the channels of input.
Examples:
.. code-block:: python
import paddle
import paddle.nn.functional as F
import numpy as np
x = np.random.randn(2, 9, 4, 4).astype(np.float32)
paddle.disable_static()
x_var = paddle.to_tensor(x)
out_var = F.pixel_shuffle(x_var, 3)
out = out_var.numpy()
print(out.shape)
# (2, 1, 12, 12)
"""
if not in_dygraph_mode():
check_variable_and_dtype(x, 'x', ['float32', 'float64'],
'pixel_shuffle')
if not isinstance(upscale_factor, int):
raise TypeError("upscale factor must be int type")
if data_format not in ["NCHW", "NHWC"]:
raise ValueError("Attr(data_format) should be 'NCHW' or 'NHWC'."
"But recevie Attr(data_format): {} ".format(
data_format))
if in_dygraph_mode():
return core.ops.pixel_shuffle(x, "upscale_factor", upscale_factor,
"data_format", data_format)
helper = LayerHelper("pixel_shuffle", **locals())
out = helper.create_variable_for_type_inference(dtype=x.dtype)
helper.append_op(
type="pixel_shuffle",
inputs={"X": x},
outputs={"Out": out},
attrs={"upscale_factor": upscale_factor,
"data_format": data_format})
return out
...@@ -20,6 +20,7 @@ from . import conv ...@@ -20,6 +20,7 @@ from . import conv
from . import extension from . import extension
from . import activation from . import activation
from . import norm from . import norm
from . import vision
from . import distance from . import distance
from . import transformer from . import transformer
...@@ -29,6 +30,8 @@ from .conv import * ...@@ -29,6 +30,8 @@ from .conv import *
from .extension import * from .extension import *
from .activation import * from .activation import *
from .norm import * from .norm import *
from .vision import *
from .transformer import * from .transformer import *
# from .activation import PReLU #DEFINE_ALIAS # from .activation import PReLU #DEFINE_ALIAS
from .activation import ReLU #DEFINE_ALIAS from .activation import ReLU #DEFINE_ALIAS
...@@ -104,4 +107,6 @@ from .norm import InstanceNorm #DEFINE_ALIAS ...@@ -104,4 +107,6 @@ from .norm import InstanceNorm #DEFINE_ALIAS
# from .rnn import RNNCell #DEFINE_ALIAS # from .rnn import RNNCell #DEFINE_ALIAS
# from .rnn import GRUCell #DEFINE_ALIAS # from .rnn import GRUCell #DEFINE_ALIAS
# from .rnn import LSTMCell #DEFINE_ALIAS # from .rnn import LSTMCell #DEFINE_ALIAS
from .vision import PixelShuffle #DEFINE_ALIAS
from .distance import PairwiseDistance #DEFINE_ALIAS from .distance import PairwiseDistance #DEFINE_ALIAS
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# TODO: define specitial functions used in computer vision task
from ...fluid.dygraph import layers
from .. import functional
__all__ = ['PixelShuffle']
class PixelShuffle(layers.Layer):
"""
PixelShuffle Layer
This operator rearranges elements in a tensor of shape [N, C, H, W]
to a tensor of shape [N, C/upscale_factor**2, H*upscale_factor, W*upscale_factor],
or from shape [N, H, W, C] to [N, H*upscale_factor, W*upscale_factor, C/upscale_factor**2].
This is useful for implementing efficient sub-pixel convolution
with a stride of 1/upscale_factor.
Please refer to the paper: `Real-Time Single Image and Video Super-Resolution
Using an Efficient Sub-Pixel Convolutional Neural Network <https://arxiv.org/abs/1609.05158v2>`_ .
by Shi et. al (2016) for more details.
Parameters:
upscale_factor(int): factor to increase spatial resolution.
data_format (str): The data format of the input and output data. An optional string from: "NCHW", "NHWC". The default is "NCHW". When it is "NCHW", the data is stored in the order of: [batch_size, input_channels, input_height, input_width].
name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
Shape:
- x: 4-D tensor with shape: (N, C, H, W) or (N, H, W, C).
- out: 4-D tensor with shape: (N, C/upscale_factor**2, H*upscale_factor, W*upscale_factor) or (N, H*upscale_factor, W*upscale_factor, C/upscale_factor^2).
Examples:
.. code-block:: python
import paddle
import paddle.nn as nn
import numpy as np
paddle.disable_static()
x = np.random.randn(2, 9, 4, 4).astype(np.float32)
x_var = paddle.to_tensor(x)
pixel_shuffle = nn.PixelShuffle(3)
out_var = pixel_shuffle(x_var)
out = out_var.numpy()
print(out.shape)
# (2, 1, 12, 12)
"""
def __init__(self, upscale_factor, data_format="NCHW", name=None):
super(PixelShuffle, self).__init__()
if not isinstance(upscale_factor, int):
raise TypeError("upscale factor must be int type")
if data_format not in ["NCHW", "NHWC"]:
raise ValueError("Data format should be 'NCHW' or 'NHWC'."
"But recevie data format: {}".format(data_format))
self._upscale_factor = upscale_factor
self._data_format = data_format
self._name = name
def forward(self, x):
return functional.pixel_shuffle(x, self._upscale_factor,
self._data_format, self._name)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册