From 48cc4846430eefcd0d1b03349b982675ce853091 Mon Sep 17 00:00:00 2001 From: tink2123 Date: Wed, 23 Jan 2019 19:27:55 +0800 Subject: [PATCH] add align_corners and align_mode for image_resize test=develop --- paddle/fluid/operators/interpolate_op.cc | 73 ++++++ paddle/fluid/operators/interpolate_op.cu | 96 +++++--- paddle/fluid/operators/interpolate_op.h | 102 ++++++--- python/paddle/fluid/layers/nn.py | 207 +++++++++++++++++- .../unittests/test_bilinear_interp_op.py | 94 ++++++-- .../tests/unittests/test_nearest_interp_op.py | 57 ++++- 6 files changed, 529 insertions(+), 100 deletions(-) diff --git a/paddle/fluid/operators/interpolate_op.cc b/paddle/fluid/operators/interpolate_op.cc index 93dd3f794..1b34d404c 100644 --- a/paddle/fluid/operators/interpolate_op.cc +++ b/paddle/fluid/operators/interpolate_op.cc @@ -82,6 +82,18 @@ class InterpolateOpMaker : public framework::OpProtoAndCheckerMaker { "bilinear interpolation and \"nearest\" for nearest " "neighbor interpolation.") .SetDefault("bilinear"); + AddAttr( + "align_corners", + "an optinal bool. Defaults to True. " + "If True, the centers of 4 corner pixels of the input and output " + "tensors are aligned, preserving the values at the corner pixels, " + "if Flase, are not aligned") + .SetDefault(true); + AddAttr("align_mode", + "(int, default \'0\'), align_corners mode , can be \'0\' " + "for pytorch calculation method, can be \'1\' for " + "tensorflow calculation method.") + .SetDefault(0); AddComment(R"DOC( This operator samples input X to given output shape by using specified interpolation method, the interpolation methods can be \"nearest\" @@ -98,6 +110,67 @@ class InterpolateOpMaker : public framework::OpProtoAndCheckerMaker { to perform linear interpolation first in one direction, and then again in the other direction. + Align_corners and align_mode are optinal parameters,The calculation method + of interpolation can be selected by them. + + Example: + + for scale: + + if align_corners = True and out_{size}>1 : + + scale_{factor} = (in_{size}-1.0)/(out_{size}-1.0) + + else: + + scale_{factor} = float(in_{size}/out_{size}) + + + Nearest neighbor interpolation: + + case 1: + align_corners = False + + input : (N,C,H_in,W_in) + output: (N,C,H_out,W_out) where: + + H_out = \left \lfloor {H_{in} * scale_{}factor}} \right \rfloor + W_out = \left \lfloor {W_{in} * scale_{}factor}} \right \rfloor + + case 2: + align_corners = True + + input : (N,C,H_in,W_in) + output: (N,C,H_out,W_out) where: + + H_out = round(H_{in} * scale_{factor}) + W_out = round(W_{in} * scale_{factor}) + + Bilinear interpolation: + + case 1: + align_corners = False , align_mode = 0 + + input : (N,C,H_in,W_in) + output: (N,C,H_out,W_out) where: + + H_out = (H_{in}+0.5) * scale_{factor} - 0.5 + W_out = (W_{in}+0.5) * scale_{factor} - 0.5 + + + case 2: + align_corners = False , align_mode = 1 + or + align_corners = True + + input : (N,C,H_in,W_in) + output: (N,C,H_out,W_out) where: + + H_out = H_{in} * scale_{factor} + W_out = W_{in} * scale_{factor} + + + For details of nearest neighbor interpolation, please refer to Wikipedia: https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation diff --git a/paddle/fluid/operators/interpolate_op.cu b/paddle/fluid/operators/interpolate_op.cu index 99ac725f7..316811d23 100644 --- a/paddle/fluid/operators/interpolate_op.cu +++ b/paddle/fluid/operators/interpolate_op.cu @@ -23,7 +23,8 @@ __global__ void KeNearestNeighborInterpFw( const T* in, const size_t in_img_h, const size_t in_img_w, const size_t input_h, const size_t input_w, T* out, const size_t out_img_h, const size_t out_img_w, const size_t output_h, const size_t output_w, - const size_t num_channels, const float ratio_h, const float ratio_w) { + const size_t num_channels, const float ratio_h, const float ratio_w, + const bool align_corners) { int nthreads = output_h * output_w; int tid = blockIdx.x * blockDim.x + threadIdx.x; int stride = blockDim.x * gridDim.x; @@ -35,10 +36,14 @@ __global__ void KeNearestNeighborInterpFw( int channel_id = out_id_w / out_img_size; int out_img_idy = (out_id_w % out_img_size) / out_img_w; - int in_img_idy = static_cast(ratio_h * out_img_idy + 0.5); + int in_img_idy = (align_corners) + ? static_cast(ratio_h * out_img_idy + 0.5) + : static_cast(ratio_h * out_img_idy); int out_img_idx = tid % out_img_w; - int in_img_idx = static_cast(ratio_w * out_img_idx + 0.5); + int in_img_idx = (align_corners) + ? static_cast(ratio_w * out_img_idx + 0.5) + : static_cast(ratio_w * out_img_idx); out[tid] = in[out_id_h * input_w + channel_id * in_img_size + in_img_idy * in_img_w + in_img_idx]; @@ -50,7 +55,8 @@ __global__ void KeNearestNeighborInterpBw( T* in, const size_t in_img_h, const size_t in_img_w, const size_t input_h, const size_t input_w, const T* out, const size_t out_img_h, const size_t out_img_w, const size_t output_h, const size_t output_w, - const size_t num_channels, const float ratio_h, const float ratio_w) { + const size_t num_channels, const float ratio_h, const float ratio_w, + const bool align_corners) { int nthreads = output_h * output_w; int tid = blockIdx.x * blockDim.x + threadIdx.x; int stride = blockDim.x * gridDim.x; @@ -62,10 +68,14 @@ __global__ void KeNearestNeighborInterpBw( int channel_id = out_id_w / out_img_size; int out_img_idy = (out_id_w % out_img_size) / out_img_w; - int in_img_idy = static_cast(ratio_h * out_img_idy + 0.5); + int in_img_idy = (align_corners) + ? static_cast(ratio_h * out_img_idy + 0.5) + : static_cast(ratio_h * out_img_idy); int out_img_idx = tid % out_img_w; - int in_img_idx = static_cast(ratio_w * out_img_idx + 0.5); + int in_img_idx = (align_corners) + ? static_cast(ratio_w * out_img_idx + 0.5) + : static_cast(ratio_w * out_img_idx); T* in_pos = &in[out_id_h * input_w + channel_id * in_img_size + in_img_idy * in_img_w + in_img_idx]; @@ -79,7 +89,8 @@ __global__ void KeBilinearInterpFw( const T* in, const size_t in_img_h, const size_t in_img_w, const size_t input_h, const size_t input_w, T* out, const size_t out_img_h, const size_t out_img_w, const size_t output_h, const size_t output_w, - const size_t num_channels, const float ratio_h, const float ratio_w) { + const size_t num_channels, const float ratio_h, const float ratio_w, + const bool align_corners, const int align_mode) { int nthreads = output_h * output_w; int tid = blockIdx.x * blockDim.x + threadIdx.x; int stride = blockDim.x * gridDim.x; @@ -91,15 +102,23 @@ __global__ void KeBilinearInterpFw( int channel_id = out_id_w / out_img_size; int out_img_idy = (out_id_w % out_img_size) / out_img_w; - int in_img_idy = ratio_h * out_img_idy; + int in_img_idy = (align_mode == 0 && !align_corners) + ? static_cast(ratio_h * (out_img_idy + 0.5) - 0.5) + : static_cast(ratio_h * out_img_idy); int h_id = (in_img_idy < in_img_h - 1) ? 1 : 0; - T h1lambda = ratio_h * out_img_idy - in_img_idy; + T h1lambda = (align_mode == 0 && !align_corners) + ? ratio_h * (out_img_idy + 0.5) - 0.5 - in_img_idy + : ratio_h * out_img_idy - in_img_idy; T h2lambda = 1.f - h1lambda; int out_img_idx = tid % out_img_w; - int in_img_idx = ratio_w * out_img_idx; + int in_img_idx = (align_mode == 0 && !align_corners) + ? static_cast(ratio_w * (out_img_idx + 0.5) - 0.5) + : static_cast(ratio_w * out_img_idx); int w_id = (in_img_idx < in_img_w - 1) ? 1 : 0; - T w1lambda = ratio_w * out_img_idx - in_img_idx; + T w1lambda = (align_mode == 0 && !align_corners) + ? ratio_w * (out_img_idx + 0.5) - 0.5 - in_img_idx + : ratio_w * out_img_idx - in_img_idx; T w2lambda = 1.f - w1lambda; const T* in_pos = &in[out_id_h * input_w + channel_id * in_img_size + @@ -118,7 +137,8 @@ __global__ void KeBilinearInterpBw( T* in, const size_t in_img_h, const size_t in_img_w, const size_t input_h, const size_t input_w, const T* out, const size_t out_img_h, const size_t out_img_w, const size_t output_h, const size_t output_w, - const size_t num_channels, const T ratio_h, const T ratio_w) { + const size_t num_channels, const T ratio_h, const T ratio_w, + const bool align_corners, const int align_mode) { int nthreads = output_h * output_w; int tid = blockIdx.x * blockDim.x + threadIdx.x; int stride = blockDim.x * gridDim.x; @@ -130,15 +150,24 @@ __global__ void KeBilinearInterpBw( int channel_id = out_id_w / out_img_size; int out_img_idy = (out_id_w % out_img_size) / out_img_w; - int in_img_idy = ratio_h * out_img_idy; + int in_img_idy = (align_mode == 0 && !align_corners) + ? ratio_h * (out_img_idy + 0.5) - 0.5 + : ratio_h * out_img_idy; int h_id = (in_img_idy < in_img_h - 1) ? 1 : 0; - T h1lambda = ratio_h * out_img_idy - in_img_idy; + T h1lambda = (align_mode == 0 && !align_corners) + ? ratio_h * (out_img_idy + 0.5) - 0.5 - in_img_idy + : ratio_h * out_img_idy - in_img_idy; + T h2lambda = 1.f - h1lambda; int out_img_idx = tid % out_img_w; - int in_img_idx = ratio_w * out_img_idx; + int in_img_idx = (align_mode == 0 && !align_corners) + ? ratio_w * (out_img_idx + 0.5) - 0.5 + : ratio_w * out_img_idx; int w_id = (in_img_idx < in_img_w - 1) ? 1 : 0; - T w1lambda = ratio_w * out_img_idx - in_img_idx; + T w1lambda = (align_mode == 0 && !align_corners) + ? ratio_w * (out_img_idx + 0.5) - 0.5 - in_img_idx + : ratio_w * out_img_idx - in_img_idx; T w2lambda = 1.f - w1lambda; T* in_pos = &in[out_id_h * input_w + channel_id * in_img_size + @@ -175,6 +204,9 @@ class InterpolateOpCUDAKernel : public framework::OpKernel { out_w = size_data[1]; } + bool align_corners = ctx.Attr("align_corners"); + int align_mode = ctx.Attr("align_mode"); + int n = input->dims()[0]; int c = input->dims()[1]; int in_h = input->dims()[2]; @@ -188,10 +220,12 @@ class InterpolateOpCUDAKernel : public framework::OpKernel { int in_chw = c * in_hw; int out_chw = c * out_hw; - float ratio_h = - (out_h > 1) ? static_cast(in_h - 1) / (out_h - 1) : 0.f; - float ratio_w = - (out_w > 1) ? static_cast(in_w - 1) / (out_w - 1) : 0.f; + float ratio_h = (align_corners && out_h > 1) + ? static_cast(in_h - 1) / (out_h - 1) + : static_cast(in_h) / out_h; + float ratio_w = (align_corners && out_w > 1) + ? static_cast(in_w - 1) / (out_w - 1) + : static_cast(in_w) / out_w; if (in_h == out_h && in_w == out_w) { framework::TensorCopy(*input, ctx.GetPlace(), output); @@ -206,12 +240,12 @@ class InterpolateOpCUDAKernel : public framework::OpKernel { KeNearestNeighborInterpFw< T><<>>( input_data, in_h, in_w, n, in_chw, output_data, out_h, out_w, n, - out_chw, c, ratio_h, ratio_w); + out_chw, c, ratio_h, ratio_w, align_corners); } else if ("bilinear" == interp_method) { KeBilinearInterpFw< T><<>>( input_data, in_h, in_w, n, in_chw, output_data, out_h, out_w, n, - out_chw, c, ratio_h, ratio_w); + out_chw, c, ratio_h, ratio_w, align_corners, align_mode); } } }; @@ -234,6 +268,10 @@ class InterpolateGradOpCUDAKernel : public framework::OpKernel { int out_h = ctx.Attr("out_h"); int out_w = ctx.Attr("out_w"); auto out_size = ctx.Input("OutSize"); + + bool align_corners = ctx.Attr("align_corners"); + int align_mode = ctx.Attr("align_mode"); + if (out_size != nullptr) { Tensor sizes; framework::TensorCopy(*out_size, platform::CPUPlace(), &sizes); @@ -252,10 +290,12 @@ class InterpolateGradOpCUDAKernel : public framework::OpKernel { int in_chw = c * in_hw; int out_chw = c * out_hw; - float ratio_h = - (out_h > 1) ? static_cast(in_h - 1) / (out_h - 1) : 0.f; - float ratio_w = - (out_w > 1) ? static_cast(in_w - 1) / (out_w - 1) : 0.f; + float ratio_h = (align_corners && out_h > 1) + ? static_cast(in_h - 1) / (out_h - 1) + : static_cast(in_h) / out_h; + float ratio_w = (align_corners && out_w > 1) + ? static_cast(in_w - 1) / (out_w - 1) + : static_cast(in_w) / out_w; if (in_h == out_h && in_w == out_w) { framework::TensorCopy(*output_grad, ctx.GetPlace(), input_grad); @@ -270,12 +310,12 @@ class InterpolateGradOpCUDAKernel : public framework::OpKernel { KeNearestNeighborInterpBw< T><<>>( input_grad_data, in_h, in_w, n, in_chw, output_grad_data, out_h, - out_w, n, out_chw, c, ratio_h, ratio_w); + out_w, n, out_chw, c, ratio_h, ratio_w, align_corners); } else if ("bilinear" == interp_method) { KeBilinearInterpBw< T><<>>( input_grad_data, in_h, in_w, n, in_chw, output_grad_data, out_h, - out_w, n, out_chw, c, ratio_h, ratio_w); + out_w, n, out_chw, c, ratio_h, ratio_w, align_corners, align_mode); } } }; diff --git a/paddle/fluid/operators/interpolate_op.h b/paddle/fluid/operators/interpolate_op.h index 7fdb3e1f5..95aec33ee 100644 --- a/paddle/fluid/operators/interpolate_op.h +++ b/paddle/fluid/operators/interpolate_op.h @@ -26,14 +26,17 @@ template static void NearestNeighborInterpolate(const Tensor& input, Tensor* output, const float ratio_h, const float ratio_w, const int n, const int c, - const int out_h, const int out_w) { + const int out_h, const int out_w, + const bool align_corners) { auto input_t = EigenTensor::From(input); auto output_t = EigenTensor::From(*output); for (int k = 0; k < out_h; k++) { // loop for images - int in_k = static_cast(ratio_h * k + 0.5); + int in_k = (align_corners) ? static_cast(ratio_h * k + 0.5) + : static_cast(ratio_h * k); for (int l = 0; l < out_w; l++) { - int in_l = static_cast(ratio_w * l + 0.5); + int in_l = (align_corners) ? static_cast(ratio_w * l + 0.5) + : static_cast(ratio_w * l); for (int i = 0; i < n; i++) { // loop for batches for (int j = 0; j < c; j++) { // loop for channels @@ -48,20 +51,29 @@ template static void BilinearInterpolation(const Tensor& input, Tensor* output, const float ratio_h, const float ratio_w, const int in_h, const int in_w, const int n, - const int c, const int out_h, - const int out_w) { + const int c, const int out_h, const int out_w, + const bool align_corners, + const bool align_mode) { auto input_t = EigenTensor::From(input); auto output_t = EigenTensor::From(*output); for (int k = 0; k < out_h; k++) { // loop for images - int y_n = static_cast(ratio_h * k); + int y_n = (align_mode == 0 && !align_corners) + ? static_cast(ratio_h * (k + 0.5) - 0.5) + : static_cast(ratio_h * k); int y_s = (y_n + 1) < (in_h - 1) ? (y_n + 1) : (in_h - 1); - float d_n = ratio_h * k - y_n; + float d_n = (align_mode == 0 && !align_corners) + ? ratio_h * (k + 0.5) - 0.5 - y_n + : ratio_h * k - y_n; float d_s = 1.f - d_n; for (int l = 0; l < out_w; l++) { - int x_w = static_cast(ratio_w * l); + int x_w = (align_mode == 0 && !align_corners) + ? static_cast(ratio_w * (l + 0.5) - 0.5) + : static_cast(ratio_w * l); int x_e = (x_w + 1) < (in_w - 1) ? (x_w + 1) : (in_w - 1); - float d_w = ratio_w * l - x_w; + float d_w = (align_mode == 0 && !align_corners) + ? ratio_w * (l + 0.5) - 0.5 - x_w + : ratio_w * l - x_w; float d_e = 1.f - d_w; for (int i = 0; i < n; i++) { // loop for batches @@ -78,19 +90,20 @@ static void BilinearInterpolation(const Tensor& input, Tensor* output, } template -static void NearestNeighborInterpolateGrad(const Tensor& output_grad, - Tensor* input_grad, - const float ratio_h, - const float ratio_w, const int n, - const int c, const int out_h, - const int out_w) { +static void NearestNeighborInterpolateGrad( + const Tensor& output_grad, Tensor* input_grad, const float ratio_h, + const float ratio_w, const int n, const int c, const int out_h, + const int out_w, const bool align_corners) { auto input_grad_t = EigenTensor::From(*input_grad); auto output_grad_t = EigenTensor::From(output_grad); + for (int k = 0; k < out_h; k++) { // loop for images - int in_k = static_cast(ratio_h * k + 0.5); + int in_k = (align_corners) ? static_cast(ratio_h * k + 0.5) + : static_cast(ratio_h * k); for (int l = 0; l < out_w; l++) { - int in_l = static_cast(ratio_w * l + 0.5); + int in_l = (align_corners) ? static_cast(ratio_w * l + 0.5) + : static_cast(ratio_w * l); for (int i = 0; i < n; i++) { // loop for batches for (int j = 0; j < c; j++) { // loop for channels @@ -106,19 +119,29 @@ static void BilinearInterpolationGrad(const Tensor& output_grad, Tensor* input_grad, const float ratio_h, const float ratio_w, const int in_h, const int in_w, const int n, const int c, - const int out_h, const int out_w) { + const int out_h, const int out_w, + const bool align_corners, + const int align_mode) { auto input_grad_t = EigenTensor::From(*input_grad); auto output_grad_t = EigenTensor::From(output_grad); for (int k = 0; k < out_h; k++) { // loop for images - int y_n = static_cast(ratio_h * k); + int y_n = (align_mode == 0 && !align_corners) + ? static_cast(ratio_h * (k + 0.5) - 0.5) + : static_cast(ratio_h * k); int y_s = (y_n + 1) < (in_h - 1) ? (y_n + 1) : (in_h - 1); - float d_n = ratio_h * k - y_n; + float d_n = (align_mode == 0 && !align_corners) + ? ratio_h * (k + 0.5) - 0.5 - y_n + : ratio_h * k - y_n; float d_s = 1.f - d_n; for (int l = 0; l < out_w; l++) { - int x_w = static_cast(ratio_w * l); + int x_w = (align_mode == 0 && !align_corners) + ? static_cast(ratio_w * (l + 0.5) - 0.5) + : static_cast(ratio_w * l); int x_e = (x_w + 1) < (in_w - 1) ? (x_w + 1) : (in_w - 1); - float d_w = ratio_w * l - x_w; + float d_w = (align_mode == 0 && !align_corners) + ? ratio_w * (l + 0.5) - 0.5 - x_w + : ratio_w * l - x_w; float d_e = 1.f - d_w; for (int i = 0; i < n; i++) { // loop for batches @@ -134,7 +157,6 @@ static void BilinearInterpolationGrad(const Tensor& output_grad, } } } - template class InterpolateKernel : public framework::OpKernel { public: @@ -151,6 +173,8 @@ class InterpolateKernel : public framework::OpKernel { out_h = out_size_data[0]; out_w = out_size_data[1]; } + bool align_corners = ctx.Attr("align_corners"); + int align_mode = ctx.Attr("align_mode"); const int n = input->dims()[0]; const int c = input->dims()[1]; @@ -168,17 +192,19 @@ class InterpolateKernel : public framework::OpKernel { return; } - float ratio_h = - (out_h > 1) ? static_cast(in_h - 1) / (out_h - 1) : 0.f; - float ratio_w = - (out_w > 1) ? static_cast(in_w - 1) / (out_w - 1) : 0.f; + float ratio_h = (align_corners && out_h > 1) + ? static_cast(in_h - 1) / (out_h - 1) + : static_cast(in_h) / out_h; + float ratio_w = (align_corners && out_w > 1) + ? static_cast(in_w - 1) / (out_w - 1) + : static_cast(in_w) / out_w; if ("bilinear" == interp_method) { BilinearInterpolation(*input, output, ratio_h, ratio_w, in_h, in_w, n, - c, out_h, out_w); + c, out_h, out_w, align_corners, align_mode); } else if ("nearest" == interp_method) { NearestNeighborInterpolate(*input, output, ratio_h, ratio_w, n, c, - out_h, out_w); + out_h, out_w, align_corners); } } }; @@ -200,6 +226,8 @@ class InterpolateGradKernel : public framework::OpKernel { out_h = out_size_data[0]; out_w = out_size_data[1]; } + bool align_corners = ctx.Attr("align_corners"); + int align_mode = ctx.Attr("align_mode"); const int n = input->dims()[0]; const int c = input->dims()[1]; @@ -217,17 +245,21 @@ class InterpolateGradKernel : public framework::OpKernel { return; } - float ratio_h = - (out_h > 1) ? static_cast(in_h - 1) / (out_h - 1) : 0.f; - float ratio_w = - (out_w > 1) ? static_cast(in_w - 1) / (out_w - 1) : 0.f; + float ratio_h = (align_corners && out_h > 1) + ? static_cast(in_h - 1) / (out_h - 1) + : static_cast(in_h) / out_h; + float ratio_w = (align_corners && out_w > 1) + ? static_cast(in_w - 1) / (out_w - 1) + : static_cast(in_w) / out_w; if ("bilinear" == interp_method) { BilinearInterpolationGrad(*output_grad, input_grad, ratio_h, ratio_w, - in_h, in_w, n, c, out_h, out_w); + in_h, in_w, n, c, out_h, out_w, + align_corners, align_mode); } else if ("nearest" == interp_method) { NearestNeighborInterpolateGrad(*output_grad, input_grad, ratio_h, - ratio_w, n, c, out_h, out_w); + ratio_w, n, c, out_h, out_w, + align_corners); } } }; diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 56971cff4..93e77dc11 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -913,7 +913,7 @@ def dynamic_gru(input, create ParamAttr as param_attr. If the Initializer of the param_attr is not set, the parameter is initialized with Xavier. Default: None. bias_attr (ParamAttr|bool|None): The parameter attribute for the bias - of GRU. Note that the bias with :math:`(1 \\times 3D)` concatenates + of GRU.Note that the bias with :math:`(1 \\times 3D)` concatenates the bias in the update gate, reset gate and candidate calculations. If it is set to False, no bias will be applied to the update gate, reset gate and candidate calculations. If it is set to None or one @@ -1034,7 +1034,7 @@ def gru_unit(input, create ParamAttr as param_attr. If the Initializer of the param_attr is not set, the parameter is initialized with Xavier. Default: None. bias_attr (ParamAttr|bool|None): The parameter attribute for the bias - of GRU. Note that the bias with :math:`(1 \\times 3D)` concatenates + of GRU.Note that the bias with :math:`(1 \\times 3D)` concatenates the bias in the update gate, reset gate and candidate calculations. If it is set to False, no bias will be applied to the update gate, reset gate and candidate calculations. If it is set to None or one @@ -5350,7 +5350,7 @@ def transpose(x, perm, name=None): Examples: .. code-block:: python - # use append_batch_size=False to avoid prepending extra + # use append_batch_size=False to avoid prepending extra # batch size in shape x = fluid.layers.data(name='x', shape=[5, 10, 15], dtype='float32', append_batch_size=False) @@ -5866,7 +5866,7 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=False, name=None): than :attr:`shape`. act (str): The non-linear activation to be applied to the reshaped tensor variable. - inplace(bool): Must use :attr:`False` if :attr:`x` is used in multiple + inplace(bool): Must use :attr:`False` if :attr:`x` is used in multiple operators. If this flag is set :attr:`True`, reuse input :attr:`x` to reshape, which will change the shape of tensor variable :attr:`x` and might cause errors when @@ -6527,7 +6527,9 @@ def image_resize(input, scale=None, name=None, resample='BILINEAR', - actual_shape=None): + actual_shape=None, + align_corners=True, + align_mode=0): """ **Resize a Batch of Images** @@ -6540,6 +6542,83 @@ def image_resize(input, 'NEAREST' : Nearest neighbor interpolation + Nearest neighbor interpolation is to perform nearest neighbor interpolation + in both the 3rd dimention(in height direction) and the 4th dimention(in width + direction) on input tensor. + + Bilinear interpolation is an extension of linear interpolation for + interpolating functions of two variables (e.g. H-direction and + W-direction in this op) on a rectilinear 2D grid. The key idea is + to perform linear interpolation first in one direction, and then + again in the other direction. + + Align_corners and align_mode are optinal parameters,The calculation method + of interpolation can be selected by them. + + Example: + + for scale: + + if align_corners = True && out_size > 1 : + + scale_factor = (in_size-1.0)/(out_size-1.0) + + else: + + scale_factor = float(in_size/out_size) + + + Nearest neighbor interpolation: + + case 1: + align_corners = False + + input : (N,C,H_in,W_in) + output: (N,C,H_out,W_out) where: + + H_out = \left \lfloor {H_{in} * scale_{}factor}} \right \rfloor + W_out = \left \lfloor {W_{in} * scale_{}factor}} \right \rfloor + + case 2: + align_corners = True + + input : (N,C,H_in,W_in) + output: (N,C,H_out,W_out) where: + + H_out = round(H_{in} * scale_{factor}) + W_out = round(W_{in} * scale_{factor}) + + Bilinear interpolation: + + case 1: + align_corners = False , align_mode = 0 + + input : (N,C,H_in,W_in) + output: (N,C,H_out,W_out) where: + + H_out = (H_{in}+0.5) * scale_{factor} - 0.5 + W_out = (W_{in}+0.5) * scale_{factor} - 0.5 + + + case 2: + align_corners = False , align_mode = 1 + or + align_corners = True + + input : (N,C,H_in,W_in) + output: (N,C,H_out,W_out) where: + + H_out = H_{in} * scale_{factor} + W_out = W_{in} * scale_{factor} + + For details of nearest neighbor interpolation, please refer to Wikipedia: + https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation. + + For details of bilinear interpolation, please refer to Wikipedia: + https://en.wikipedia.org/wiki/Bilinear_interpolation. + + + Args: input (Variable): The input tensor of image resize layer, This is a 4-D tensor of the shape @@ -6569,6 +6648,12 @@ def image_resize(input, set, otherwise errors would be occured in graph constructing stage. Default: None + align_corners(bool) : An optional bool, If True, the centers of the 4 corner pixels of the + input and output tensors are aligned, preserving the values at the + corner pixels. + Default: True + align_mode(int) : An optional input to specify align_corners mode. can be \'0\' + for pytorch calculation method, can be \'1'\ for tensorflow calculation method. Returns: Variable: The output is a 4-D tensor of the shape @@ -6581,6 +6666,8 @@ def image_resize(input, or 'NEAREST' currently. ValueError: One of out_shape and scale must not be None. ValueError: out_shape length should be 2. + TypeError: align_corners shoule be a bool value + ValueError: align_mode can only be '0' or '1' Examples: .. code-block:: python @@ -6596,6 +6683,12 @@ def image_resize(input, "The 'resample' of image_resize can only be 'BILINEAR' or 'NEAREST' currently." ) resample_type = resample_methods[resample] + + if not isinstance(align_corners, bool): + raise TypeError("Attr align_corners should be a bool value") + if align_mode != 0 and align_mode != 1: + raise ValueError("align_mode can only be 0 or 1") + if out_shape is None and scale is None: raise ValueError("One of out_shape and scale must not be None.") helper = LayerHelper('{}_interp'.format(resample_type), **locals()) @@ -6635,9 +6728,13 @@ def image_resize(input, type='{}_interp'.format(resample_type), inputs=inputs, outputs={"Out": out}, - attrs={"out_h": out_h, - "out_w": out_w, - "interp_method": resample_type}) + attrs={ + "out_h": out_h, + "out_w": out_w, + "interp_method": resample_type, + "align_corners": align_corners, + "align_mode": align_mode + }) return out @@ -6646,7 +6743,9 @@ def resize_bilinear(input, out_shape=None, scale=None, name=None, - actual_shape=None): + actual_shape=None, + align_corners=True, + align_mode=0): """ Resize input by performing bilinear interpolation based on given output shape which specified by actual_shape, out_shape and scale @@ -6661,6 +6760,50 @@ def resize_bilinear(input, For details of bilinear interpolation, please refer to Wikipedia: https://en.wikipedia.org/wiki/Bilinear_interpolation + Align_corners and align_mode are optinal parameters,The calculation + method of interpolation can be selected by them. + + + Align_corners and align_mode are optinal parameters,The calculation method + of interpolation can be selected by them. + + Example: + + for scale: + + if align_corners = True && out_size > 1 : + + scale_factor = (in_size-1.0)/(out_size-1.0) + + else: + + scale_factor = float(in_size/out_size) + + Bilinear interpolation: + + case 1: + align_corners = False , align_mode = 0 + + input : (N,C,H_in,W_in) + output: (N,C,H_out,W_out) where: + + H_out = (H_{in}+0.5) * scale_{factor} - 0.5 + W_out = (W_{in}+0.5) * scale_{factor} - 0.5 + + + case 2: + align_corners = False , align_mode = 1 + or + align_corners = True + + input : (N,C,H_in,W_in) + output: (N,C,H_out,W_out) where: + + H_out = H_{in} * scale_{factor} + W_out = W_{in} * scale_{factor} + + + Args: input(${x_type}): ${x_comment}. @@ -6684,6 +6827,8 @@ def resize_bilinear(input, set, otherwise errors would be occured in graph constructing stage. Default: None + align_corners(bool): ${align_corners_comment} + align_mode(bool): ${align_mode_comment} Returns: ${out_comment}. @@ -6694,7 +6839,8 @@ def resize_bilinear(input, out = fluid.layers.resize_bilinear(input, out_shape=[12, 12]) """ - return image_resize(input, out_shape, scale, name, 'BILINEAR', actual_shape) + return image_resize(input, out_shape, scale, name, 'BILINEAR', actual_shape, + align_corners, align_mode) @templatedoc(op_type="nearest_interp") @@ -6702,13 +6848,48 @@ def resize_nearest(input, out_shape=None, scale=None, name=None, - actual_shape=None): + actual_shape=None, + align_corners=True): """ Resize input by performing nearest neighbor interpolation in both the 3rd dimention(in height direction) and the 4th dimention(in width direction) based on given output shape which specified by actual_shape, out_shape and scale in priority order. + Example: + + for scale: + + if align_corners = True && out_size > 1 : + + scale_factor = (in_size-1.0)/(out_size-1.0) + + else: + + scale_factor = float(in_size/out_size) + + + Nearest neighbor interpolation: + + case 1: + align_corners = False + + input : (N,C,H_in,W_in) + output: (N,C,H_out,W_out) where: + + H_out = \left \lfloor {H_{in} * scale_{}factor}} \right \rfloor + W_out = \left \lfloor {W_{in} * scale_{}factor}} \right \rfloor + + case 2: + align_corners = True + + input : (N,C,H_in,W_in) + output: (N,C,H_out,W_out) where: + + H_out = round(H_{in} * scale_{factor}) + W_out = round(W_{in} * scale_{factor}) + + For details of nearest neighbor interpolation, please refer to Wikipedia: https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation @@ -6735,6 +6916,7 @@ def resize_nearest(input, set, otherwise errors would be occured in graph constructing stage. Default: None + align_corners(bool): ${align_corners_comment} Returns: ${out_comment}. @@ -6745,7 +6927,8 @@ def resize_nearest(input, out = fluid.layers.resize_nearest(input, out_shape=[12, 12]) """ - return image_resize(input, out_shape, scale, name, 'NEAREST', actual_shape) + return image_resize(input, out_shape, scale, name, 'NEAREST', actual_shape, + align_corners) def image_resize_short(input, out_short_len, resample='BILINEAR'): diff --git a/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py b/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py index c8a7063dc..4523fb54c 100644 --- a/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py +++ b/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py @@ -20,7 +20,13 @@ from op_test import OpTest import paddle.fluid.core as core -def bilinear_interp_np(input, out_h, out_w, out_size=None, actual_shape=None): +def bilinear_interp_np(input, + out_h, + out_w, + out_size=None, + actual_shape=None, + align_corners=True, + align_mode=0): """bilinear interpolation implement in shape [N, C, H, W]""" if out_size is not None: out_h = out_size[0] @@ -29,25 +35,41 @@ def bilinear_interp_np(input, out_h, out_w, out_size=None, actual_shape=None): out_h = actual_shape[0] out_w = actual_shape[1] batch_size, channel, in_h, in_w = input.shape - if out_h > 1: + + ratio_h = ratio_w = 0.0 + if (align_corners and out_h > 1): ratio_h = (in_h - 1.0) / (out_h - 1.0) else: - ratio_h = 0.0 - if out_w > 1: + ratio_h = 1.0 * in_h / out_h + if (align_corners and out_w > 1): ratio_w = (in_w - 1.0) / (out_w - 1.0) else: - ratio_w = 0.0 + ratio_w = 1.0 * in_w / out_w out = np.zeros((batch_size, channel, out_h, out_w)) + for i in range(out_h): - h = int(ratio_h * i) + if (align_mode == 0 and not align_corners): + h = int(ratio_h * (i + 0.5) - 0.5) + else: + h = int(ratio_h * i) + hid = 1 if h < in_h - 1 else 0 - h1lambda = ratio_h * i - h + if (align_mode == 0 and not align_corners): + h1lambda = ratio_h * (i + 0.5) - 0.5 - h + else: + h1lambda = ratio_h * i - h h2lambda = 1.0 - h1lambda for j in range(out_w): - w = int(ratio_w * j) + if (align_mode == 0 and not align_corners): + w = int(ratio_w * (j + 0.5) - 0.5) + else: + w = int(ratio_w * j) wid = 1 if w < in_w - 1 else 0 - w1lambda = ratio_w * j - w + if (align_mode == 0 and not align_corners): + w1lambda = ratio_w * (j + 0.5) - 0.5 - w + else: + w1lambda = ratio_w * j - w w2lambda = 1.0 - w1lambda out[:, :, i, j] = h2lambda*(w2lambda*input[:, :, h, w] + @@ -66,7 +88,8 @@ class TestBilinearInterpOp(OpTest): input_np = np.random.random(self.input_shape).astype("float32") output_np = bilinear_interp_np(input_np, self.out_h, self.out_w, - self.out_size, self.actual_shape) + self.out_size, self.actual_shape, + self.align_corners, self.align_mode) self.inputs = {'X': input_np} if self.out_size is not None: self.inputs['OutSize'] = self.out_size @@ -75,7 +98,9 @@ class TestBilinearInterpOp(OpTest): self.attrs = { 'out_h': self.out_h, 'out_w': self.out_w, - 'interp_method': self.interp_method + 'interp_method': self.interp_method, + 'align_corners': self.align_corners, + 'align_mode': self.align_mode } self.outputs = {'Out': output_np} @@ -91,6 +116,8 @@ class TestBilinearInterpOp(OpTest): self.out_h = 2 self.out_w = 2 self.out_size = np.array([3, 3]).astype("int32") + self.align_corners = False + self.align_mode = 0 class TestBilinearInterpCase1(TestBilinearInterpOp): @@ -99,6 +126,8 @@ class TestBilinearInterpCase1(TestBilinearInterpOp): self.input_shape = [4, 1, 7, 8] self.out_h = 1 self.out_w = 1 + self.align_corners = False + self.align_mode = 0 class TestBilinearInterpCase2(TestBilinearInterpOp): @@ -107,6 +136,8 @@ class TestBilinearInterpCase2(TestBilinearInterpOp): self.input_shape = [3, 3, 9, 6] self.out_h = 12 self.out_w = 12 + self.align_corners = False + self.align_mode = 0 class TestBilinearInterpCase3(TestBilinearInterpOp): @@ -115,6 +146,8 @@ class TestBilinearInterpCase3(TestBilinearInterpOp): self.input_shape = [1, 1, 128, 64] self.out_h = 64 self.out_w = 128 + self.align_corners = False + self.align_mode = 0 class TestBilinearInterpCase4(TestBilinearInterpOp): @@ -124,6 +157,8 @@ class TestBilinearInterpCase4(TestBilinearInterpOp): self.out_h = 1 self.out_w = 1 self.out_size = np.array([2, 2]).astype("int32") + self.align_corners = False + self.align_mode = 0 class TestBilinearInterpCase5(TestBilinearInterpOp): @@ -133,6 +168,8 @@ class TestBilinearInterpCase5(TestBilinearInterpOp): self.out_h = 12 self.out_w = 12 self.out_size = np.array([11, 11]).astype("int32") + self.align_corners = False + self.align_mode = 0 class TestBilinearInterpCase6(TestBilinearInterpOp): @@ -142,6 +179,8 @@ class TestBilinearInterpCase6(TestBilinearInterpOp): self.out_h = 64 self.out_w = 128 self.out_size = np.array([65, 129]).astype("int32") + self.align_corners = False + self.align_mode = 0 class TestBilinearInterpActualShape(TestBilinearInterpOp): @@ -151,6 +190,8 @@ class TestBilinearInterpActualShape(TestBilinearInterpOp): self.out_h = 64 self.out_w = 32 self.out_size = np.array([66, 40]).astype("int32") + self.align_corners = False + self.align_mode = 0 class TestBilinearInterpOpUint8(OpTest): @@ -162,14 +203,17 @@ class TestBilinearInterpOpUint8(OpTest): input_np = np.random.randint( low=0, high=256, size=self.input_shape).astype("uint8") output_np = bilinear_interp_np(input_np, self.out_h, self.out_w, - self.out_size, self.actual_shape) + self.out_size, self.actual_shape, + self.align_corners, self.align_mode) self.inputs = {'X': input_np} if self.out_size is not None: self.inputs['OutSize'] = self.out_size self.attrs = { 'out_h': self.out_h, 'out_w': self.out_w, - 'interp_method': self.interp_method + 'interp_method': self.interp_method, + 'align_corners': self.align_corners, + 'align_mode': self.align_mode } self.outputs = {'Out': output_np} @@ -181,6 +225,8 @@ class TestBilinearInterpOpUint8(OpTest): self.input_shape = [1, 3, 9, 6] self.out_h = 10 self.out_w = 9 + self.align_corners = False + self.align_mode = 0 class TestBilinearInterpCase1Uint8(TestBilinearInterpOpUint8): @@ -189,6 +235,8 @@ class TestBilinearInterpCase1Uint8(TestBilinearInterpOpUint8): self.input_shape = [2, 3, 128, 64] self.out_h = 120 self.out_w = 50 + self.align_corners = False + self.align_mode = 0 class TestBilinearInterpCase2Uint8(TestBilinearInterpOpUint8): @@ -198,6 +246,26 @@ class TestBilinearInterpCase2Uint8(TestBilinearInterpOpUint8): self.out_h = 5 self.out_w = 13 self.out_size = np.array([6, 15]).astype("int32") + self.align_corners = False + self.align_mode = 0 + + +class TestBilinearInterpOtherMethod1(TestBilinearInterpOp): + def set_align_mode(self): + self.align_mode = 1 + self.align_corners = False + + +class TestBilinearInterpWithMethod2(TestBilinearInterpOp): + def set_align_mode(self): + self.align_corners = True + self.align_mode = 1 + + +class TestBilinearInterpWithMethod3(TestBilinearInterpOp): + def set_align_mode(self): + self.align_corners = True + self.align_mode = 0 if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_nearest_interp_op.py b/python/paddle/fluid/tests/unittests/test_nearest_interp_op.py index 242709425..22f7bac0b 100644 --- a/python/paddle/fluid/tests/unittests/test_nearest_interp_op.py +++ b/python/paddle/fluid/tests/unittests/test_nearest_interp_op.py @@ -24,7 +24,8 @@ def nearest_neighbor_interp_np(X, out_h, out_w, out_size=None, - actual_shape=None): + actual_shape=None, + align_corners=True): """nearest neighbor interpolation implement in shape [N, C, H, W]""" if out_size is not None: out_h = out_size[0] @@ -35,17 +36,29 @@ def nearest_neighbor_interp_np(X, n, c, in_h, in_w = X.shape ratio_h = ratio_w = 0.0 - if out_h > 1: + if (align_corners and out_h > 1): ratio_h = (in_h - 1.0) / (out_h - 1.0) - if out_w > 1: + else: + ratio_h = 1.0 * in_h / out_h + if (align_corners and out_w > 1): ratio_w = (in_w - 1.0) / (out_w - 1.0) + else: + ratio_w = 1.0 * in_w / out_w out = np.zeros((n, c, out_h, out_w)) - for i in range(out_h): - in_i = int(ratio_h * i + 0.5) - for j in range(out_w): - in_j = int(ratio_w * j + 0.5) - out[:, :, i, j] = X[:, :, in_i, in_j] + + if align_corners: + for i in range(out_h): + in_i = int(ratio_h * i + 0.5) + for j in range(out_w): + in_j = int(ratio_w * j + 0.5) + out[:, :, i, j] = X[:, :, in_i, in_j] + else: + for i in range(out_h): + in_i = int(ratio_h * i) + for j in range(out_w): + in_j = int(ratio_w * j) + out[:, :, i, j] = X[:, :, in_i, in_j] return out.astype(X.dtype) @@ -59,7 +72,8 @@ class TestNearestInterpOp(OpTest): input_np = np.random.random(self.input_shape).astype("float32") output_np = nearest_neighbor_interp_np(input_np, self.out_h, self.out_w, - self.out_size, self.actual_shape) + self.out_size, self.actual_shape, + self.align_corners) self.inputs = {'X': input_np} if self.out_size is not None: self.inputs['OutSize'] = self.out_size @@ -68,7 +82,8 @@ class TestNearestInterpOp(OpTest): self.attrs = { 'out_h': self.out_h, 'out_w': self.out_w, - 'interp_method': self.interp_method + 'interp_method': self.interp_method, + 'align_corners': self.align_corners, } self.outputs = {'Out': output_np} @@ -84,6 +99,7 @@ class TestNearestInterpOp(OpTest): self.out_h = 2 self.out_w = 2 self.out_size = np.array([3, 3]).astype("int32") + self.align_corners = True class TestNearestNeighborInterpCase1(TestNearestInterpOp): @@ -92,6 +108,7 @@ class TestNearestNeighborInterpCase1(TestNearestInterpOp): self.input_shape = [4, 1, 7, 8] self.out_h = 1 self.out_w = 1 + self.align_corners = False class TestNearestNeighborInterpCase2(TestNearestInterpOp): @@ -100,6 +117,7 @@ class TestNearestNeighborInterpCase2(TestNearestInterpOp): self.input_shape = [3, 3, 9, 6] self.out_h = 12 self.out_w = 12 + self.align_corners = True class TestNearestNeighborInterpCase3(TestNearestInterpOp): @@ -108,6 +126,7 @@ class TestNearestNeighborInterpCase3(TestNearestInterpOp): self.input_shape = [1, 1, 128, 64] self.out_h = 64 self.out_w = 128 + self.align_corners = True class TestNearestNeighborInterpCase4(TestNearestInterpOp): @@ -117,6 +136,7 @@ class TestNearestNeighborInterpCase4(TestNearestInterpOp): self.out_h = 1 self.out_w = 1 self.out_size = np.array([2, 2]).astype("int32") + self.align_corners = True class TestNearestNeighborInterpCase5(TestNearestInterpOp): @@ -126,6 +146,7 @@ class TestNearestNeighborInterpCase5(TestNearestInterpOp): self.out_h = 12 self.out_w = 12 self.out_size = np.array([11, 11]).astype("int32") + self.align_corners = True class TestNearestNeighborInterpCase6(TestNearestInterpOp): @@ -135,6 +156,7 @@ class TestNearestNeighborInterpCase6(TestNearestInterpOp): self.out_h = 64 self.out_w = 128 self.out_size = np.array([65, 129]).astype("int32") + self.align_corners = True class TestNearestNeighborInterpActualShape(TestNearestInterpOp): @@ -144,6 +166,7 @@ class TestNearestNeighborInterpActualShape(TestNearestInterpOp): self.out_h = 64 self.out_w = 32 self.out_size = np.array([66, 40]).astype("int32") + self.align_corners = True class TestNearestInterpOpUint8(OpTest): @@ -155,14 +178,16 @@ class TestNearestInterpOpUint8(OpTest): input_np = np.random.randint( low=0, high=256, size=self.input_shape).astype("uint8") output_np = nearest_neighbor_interp_np(input_np, self.out_h, self.out_w, - self.out_size, self.actual_shape) + self.out_size, self.actual_shape, + self.align_corners) self.inputs = {'X': input_np} if self.out_size is not None: self.inputs['OutSize'] = self.out_size self.attrs = { 'out_h': self.out_h, 'out_w': self.out_w, - 'interp_method': self.interp_method + 'interp_method': self.interp_method, + 'align_corners': self.align_corners } self.outputs = {'Out': output_np} @@ -174,6 +199,7 @@ class TestNearestInterpOpUint8(OpTest): self.input_shape = [1, 3, 9, 6] self.out_h = 10 self.out_w = 9 + self.align_corners = True class TestNearestNeighborInterpCase1Uint8(TestNearestInterpOpUint8): @@ -182,6 +208,7 @@ class TestNearestNeighborInterpCase1Uint8(TestNearestInterpOpUint8): self.input_shape = [2, 3, 128, 64] self.out_h = 120 self.out_w = 50 + self.align_corners = False class TestNearestNeighborInterpCase2Uint8(TestNearestInterpOpUint8): @@ -191,6 +218,12 @@ class TestNearestNeighborInterpCase2Uint8(TestNearestInterpOpUint8): self.out_h = 5 self.out_w = 13 self.out_size = np.array([6, 15]).astype("int32") + self.align_corners = True + + +class TestNearestInterpWithoutCorners(TestNearestInterpOp): + def set_align_corners(self): + self.align_corners = False if __name__ == "__main__": -- GitLab