提交 48cc4846 编写于 作者: T tink2123

add align_corners and align_mode for image_resize

test=develop
上级 e07900d3
......@@ -82,6 +82,18 @@ class InterpolateOpMaker : public framework::OpProtoAndCheckerMaker {
"bilinear interpolation and \"nearest\" for nearest "
"neighbor interpolation.")
.SetDefault("bilinear");
AddAttr<bool>(
"align_corners",
"an optinal bool. Defaults to True. "
"If True, the centers of 4 corner pixels of the input and output "
"tensors are aligned, preserving the values at the corner pixels, "
"if Flase, are not aligned")
.SetDefault(true);
AddAttr<int>("align_mode",
"(int, default \'0\'), align_corners mode , can be \'0\' "
"for pytorch calculation method, can be \'1\' for "
"tensorflow calculation method.")
.SetDefault(0);
AddComment(R"DOC(
This operator samples input X to given output shape by using specified
interpolation method, the interpolation methods can be \"nearest\"
......@@ -98,6 +110,67 @@ class InterpolateOpMaker : public framework::OpProtoAndCheckerMaker {
to perform linear interpolation first in one direction, and then
again in the other direction.
Align_corners and align_mode are optinal parameters,The calculation method
of interpolation can be selected by them.
Example:
for scale:
if align_corners = True and out_{size}>1 :
scale_{factor} = (in_{size}-1.0)/(out_{size}-1.0)
else:
scale_{factor} = float(in_{size}/out_{size})
Nearest neighbor interpolation:
case 1:
align_corners = False
input : (N,C,H_in,W_in)
output: (N,C,H_out,W_out) where:
H_out = \left \lfloor {H_{in} * scale_{}factor}} \right \rfloor
W_out = \left \lfloor {W_{in} * scale_{}factor}} \right \rfloor
case 2:
align_corners = True
input : (N,C,H_in,W_in)
output: (N,C,H_out,W_out) where:
H_out = round(H_{in} * scale_{factor})
W_out = round(W_{in} * scale_{factor})
Bilinear interpolation:
case 1:
align_corners = False , align_mode = 0
input : (N,C,H_in,W_in)
output: (N,C,H_out,W_out) where:
H_out = (H_{in}+0.5) * scale_{factor} - 0.5
W_out = (W_{in}+0.5) * scale_{factor} - 0.5
case 2:
align_corners = False , align_mode = 1
or
align_corners = True
input : (N,C,H_in,W_in)
output: (N,C,H_out,W_out) where:
H_out = H_{in} * scale_{factor}
W_out = W_{in} * scale_{factor}
For details of nearest neighbor interpolation, please refer to Wikipedia:
https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation
......
......@@ -23,7 +23,8 @@ __global__ void KeNearestNeighborInterpFw(
const T* in, const size_t in_img_h, const size_t in_img_w,
const size_t input_h, const size_t input_w, T* out, const size_t out_img_h,
const size_t out_img_w, const size_t output_h, const size_t output_w,
const size_t num_channels, const float ratio_h, const float ratio_w) {
const size_t num_channels, const float ratio_h, const float ratio_w,
const bool align_corners) {
int nthreads = output_h * output_w;
int tid = blockIdx.x * blockDim.x + threadIdx.x;
int stride = blockDim.x * gridDim.x;
......@@ -35,10 +36,14 @@ __global__ void KeNearestNeighborInterpFw(
int channel_id = out_id_w / out_img_size;
int out_img_idy = (out_id_w % out_img_size) / out_img_w;
int in_img_idy = static_cast<int>(ratio_h * out_img_idy + 0.5);
int in_img_idy = (align_corners)
? static_cast<int>(ratio_h * out_img_idy + 0.5)
: static_cast<int>(ratio_h * out_img_idy);
int out_img_idx = tid % out_img_w;
int in_img_idx = static_cast<int>(ratio_w * out_img_idx + 0.5);
int in_img_idx = (align_corners)
? static_cast<int>(ratio_w * out_img_idx + 0.5)
: static_cast<int>(ratio_w * out_img_idx);
out[tid] = in[out_id_h * input_w + channel_id * in_img_size +
in_img_idy * in_img_w + in_img_idx];
......@@ -50,7 +55,8 @@ __global__ void KeNearestNeighborInterpBw(
T* in, const size_t in_img_h, const size_t in_img_w, const size_t input_h,
const size_t input_w, const T* out, const size_t out_img_h,
const size_t out_img_w, const size_t output_h, const size_t output_w,
const size_t num_channels, const float ratio_h, const float ratio_w) {
const size_t num_channels, const float ratio_h, const float ratio_w,
const bool align_corners) {
int nthreads = output_h * output_w;
int tid = blockIdx.x * blockDim.x + threadIdx.x;
int stride = blockDim.x * gridDim.x;
......@@ -62,10 +68,14 @@ __global__ void KeNearestNeighborInterpBw(
int channel_id = out_id_w / out_img_size;
int out_img_idy = (out_id_w % out_img_size) / out_img_w;
int in_img_idy = static_cast<int>(ratio_h * out_img_idy + 0.5);
int in_img_idy = (align_corners)
? static_cast<int>(ratio_h * out_img_idy + 0.5)
: static_cast<int>(ratio_h * out_img_idy);
int out_img_idx = tid % out_img_w;
int in_img_idx = static_cast<int>(ratio_w * out_img_idx + 0.5);
int in_img_idx = (align_corners)
? static_cast<int>(ratio_w * out_img_idx + 0.5)
: static_cast<int>(ratio_w * out_img_idx);
T* in_pos = &in[out_id_h * input_w + channel_id * in_img_size +
in_img_idy * in_img_w + in_img_idx];
......@@ -79,7 +89,8 @@ __global__ void KeBilinearInterpFw(
const T* in, const size_t in_img_h, const size_t in_img_w,
const size_t input_h, const size_t input_w, T* out, const size_t out_img_h,
const size_t out_img_w, const size_t output_h, const size_t output_w,
const size_t num_channels, const float ratio_h, const float ratio_w) {
const size_t num_channels, const float ratio_h, const float ratio_w,
const bool align_corners, const int align_mode) {
int nthreads = output_h * output_w;
int tid = blockIdx.x * blockDim.x + threadIdx.x;
int stride = blockDim.x * gridDim.x;
......@@ -91,15 +102,23 @@ __global__ void KeBilinearInterpFw(
int channel_id = out_id_w / out_img_size;
int out_img_idy = (out_id_w % out_img_size) / out_img_w;
int in_img_idy = ratio_h * out_img_idy;
int in_img_idy = (align_mode == 0 && !align_corners)
? static_cast<int>(ratio_h * (out_img_idy + 0.5) - 0.5)
: static_cast<int>(ratio_h * out_img_idy);
int h_id = (in_img_idy < in_img_h - 1) ? 1 : 0;
T h1lambda = ratio_h * out_img_idy - in_img_idy;
T h1lambda = (align_mode == 0 && !align_corners)
? ratio_h * (out_img_idy + 0.5) - 0.5 - in_img_idy
: ratio_h * out_img_idy - in_img_idy;
T h2lambda = 1.f - h1lambda;
int out_img_idx = tid % out_img_w;
int in_img_idx = ratio_w * out_img_idx;
int in_img_idx = (align_mode == 0 && !align_corners)
? static_cast<int>(ratio_w * (out_img_idx + 0.5) - 0.5)
: static_cast<int>(ratio_w * out_img_idx);
int w_id = (in_img_idx < in_img_w - 1) ? 1 : 0;
T w1lambda = ratio_w * out_img_idx - in_img_idx;
T w1lambda = (align_mode == 0 && !align_corners)
? ratio_w * (out_img_idx + 0.5) - 0.5 - in_img_idx
: ratio_w * out_img_idx - in_img_idx;
T w2lambda = 1.f - w1lambda;
const T* in_pos = &in[out_id_h * input_w + channel_id * in_img_size +
......@@ -118,7 +137,8 @@ __global__ void KeBilinearInterpBw(
T* in, const size_t in_img_h, const size_t in_img_w, const size_t input_h,
const size_t input_w, const T* out, const size_t out_img_h,
const size_t out_img_w, const size_t output_h, const size_t output_w,
const size_t num_channels, const T ratio_h, const T ratio_w) {
const size_t num_channels, const T ratio_h, const T ratio_w,
const bool align_corners, const int align_mode) {
int nthreads = output_h * output_w;
int tid = blockIdx.x * blockDim.x + threadIdx.x;
int stride = blockDim.x * gridDim.x;
......@@ -130,15 +150,24 @@ __global__ void KeBilinearInterpBw(
int channel_id = out_id_w / out_img_size;
int out_img_idy = (out_id_w % out_img_size) / out_img_w;
int in_img_idy = ratio_h * out_img_idy;
int in_img_idy = (align_mode == 0 && !align_corners)
? ratio_h * (out_img_idy + 0.5) - 0.5
: ratio_h * out_img_idy;
int h_id = (in_img_idy < in_img_h - 1) ? 1 : 0;
T h1lambda = ratio_h * out_img_idy - in_img_idy;
T h1lambda = (align_mode == 0 && !align_corners)
? ratio_h * (out_img_idy + 0.5) - 0.5 - in_img_idy
: ratio_h * out_img_idy - in_img_idy;
T h2lambda = 1.f - h1lambda;
int out_img_idx = tid % out_img_w;
int in_img_idx = ratio_w * out_img_idx;
int in_img_idx = (align_mode == 0 && !align_corners)
? ratio_w * (out_img_idx + 0.5) - 0.5
: ratio_w * out_img_idx;
int w_id = (in_img_idx < in_img_w - 1) ? 1 : 0;
T w1lambda = ratio_w * out_img_idx - in_img_idx;
T w1lambda = (align_mode == 0 && !align_corners)
? ratio_w * (out_img_idx + 0.5) - 0.5 - in_img_idx
: ratio_w * out_img_idx - in_img_idx;
T w2lambda = 1.f - w1lambda;
T* in_pos = &in[out_id_h * input_w + channel_id * in_img_size +
......@@ -175,6 +204,9 @@ class InterpolateOpCUDAKernel : public framework::OpKernel<T> {
out_w = size_data[1];
}
bool align_corners = ctx.Attr<bool>("align_corners");
int align_mode = ctx.Attr<int>("align_mode");
int n = input->dims()[0];
int c = input->dims()[1];
int in_h = input->dims()[2];
......@@ -188,10 +220,12 @@ class InterpolateOpCUDAKernel : public framework::OpKernel<T> {
int in_chw = c * in_hw;
int out_chw = c * out_hw;
float ratio_h =
(out_h > 1) ? static_cast<float>(in_h - 1) / (out_h - 1) : 0.f;
float ratio_w =
(out_w > 1) ? static_cast<float>(in_w - 1) / (out_w - 1) : 0.f;
float ratio_h = (align_corners && out_h > 1)
? static_cast<float>(in_h - 1) / (out_h - 1)
: static_cast<float>(in_h) / out_h;
float ratio_w = (align_corners && out_w > 1)
? static_cast<float>(in_w - 1) / (out_w - 1)
: static_cast<float>(in_w) / out_w;
if (in_h == out_h && in_w == out_w) {
framework::TensorCopy(*input, ctx.GetPlace(), output);
......@@ -206,12 +240,12 @@ class InterpolateOpCUDAKernel : public framework::OpKernel<T> {
KeNearestNeighborInterpFw<
T><<<grid_dim, 512, 0, ctx.cuda_device_context().stream()>>>(
input_data, in_h, in_w, n, in_chw, output_data, out_h, out_w, n,
out_chw, c, ratio_h, ratio_w);
out_chw, c, ratio_h, ratio_w, align_corners);
} else if ("bilinear" == interp_method) {
KeBilinearInterpFw<
T><<<grid_dim, 512, 0, ctx.cuda_device_context().stream()>>>(
input_data, in_h, in_w, n, in_chw, output_data, out_h, out_w, n,
out_chw, c, ratio_h, ratio_w);
out_chw, c, ratio_h, ratio_w, align_corners, align_mode);
}
}
};
......@@ -234,6 +268,10 @@ class InterpolateGradOpCUDAKernel : public framework::OpKernel<T> {
int out_h = ctx.Attr<int>("out_h");
int out_w = ctx.Attr<int>("out_w");
auto out_size = ctx.Input<Tensor>("OutSize");
bool align_corners = ctx.Attr<bool>("align_corners");
int align_mode = ctx.Attr<int>("align_mode");
if (out_size != nullptr) {
Tensor sizes;
framework::TensorCopy(*out_size, platform::CPUPlace(), &sizes);
......@@ -252,10 +290,12 @@ class InterpolateGradOpCUDAKernel : public framework::OpKernel<T> {
int in_chw = c * in_hw;
int out_chw = c * out_hw;
float ratio_h =
(out_h > 1) ? static_cast<float>(in_h - 1) / (out_h - 1) : 0.f;
float ratio_w =
(out_w > 1) ? static_cast<float>(in_w - 1) / (out_w - 1) : 0.f;
float ratio_h = (align_corners && out_h > 1)
? static_cast<float>(in_h - 1) / (out_h - 1)
: static_cast<float>(in_h) / out_h;
float ratio_w = (align_corners && out_w > 1)
? static_cast<float>(in_w - 1) / (out_w - 1)
: static_cast<float>(in_w) / out_w;
if (in_h == out_h && in_w == out_w) {
framework::TensorCopy(*output_grad, ctx.GetPlace(), input_grad);
......@@ -270,12 +310,12 @@ class InterpolateGradOpCUDAKernel : public framework::OpKernel<T> {
KeNearestNeighborInterpBw<
T><<<grid_dim, 512, 0, ctx.cuda_device_context().stream()>>>(
input_grad_data, in_h, in_w, n, in_chw, output_grad_data, out_h,
out_w, n, out_chw, c, ratio_h, ratio_w);
out_w, n, out_chw, c, ratio_h, ratio_w, align_corners);
} else if ("bilinear" == interp_method) {
KeBilinearInterpBw<
T><<<grid_dim, 512, 0, ctx.cuda_device_context().stream()>>>(
input_grad_data, in_h, in_w, n, in_chw, output_grad_data, out_h,
out_w, n, out_chw, c, ratio_h, ratio_w);
out_w, n, out_chw, c, ratio_h, ratio_w, align_corners, align_mode);
}
}
};
......
......@@ -26,14 +26,17 @@ template <typename T>
static void NearestNeighborInterpolate(const Tensor& input, Tensor* output,
const float ratio_h, const float ratio_w,
const int n, const int c,
const int out_h, const int out_w) {
const int out_h, const int out_w,
const bool align_corners) {
auto input_t = EigenTensor<T, 4>::From(input);
auto output_t = EigenTensor<T, 4>::From(*output);
for (int k = 0; k < out_h; k++) { // loop for images
int in_k = static_cast<int>(ratio_h * k + 0.5);
int in_k = (align_corners) ? static_cast<int>(ratio_h * k + 0.5)
: static_cast<int>(ratio_h * k);
for (int l = 0; l < out_w; l++) {
int in_l = static_cast<int>(ratio_w * l + 0.5);
int in_l = (align_corners) ? static_cast<int>(ratio_w * l + 0.5)
: static_cast<int>(ratio_w * l);
for (int i = 0; i < n; i++) { // loop for batches
for (int j = 0; j < c; j++) { // loop for channels
......@@ -48,20 +51,29 @@ template <typename T>
static void BilinearInterpolation(const Tensor& input, Tensor* output,
const float ratio_h, const float ratio_w,
const int in_h, const int in_w, const int n,
const int c, const int out_h,
const int out_w) {
const int c, const int out_h, const int out_w,
const bool align_corners,
const bool align_mode) {
auto input_t = EigenTensor<T, 4>::From(input);
auto output_t = EigenTensor<T, 4>::From(*output);
for (int k = 0; k < out_h; k++) { // loop for images
int y_n = static_cast<int>(ratio_h * k);
int y_n = (align_mode == 0 && !align_corners)
? static_cast<int>(ratio_h * (k + 0.5) - 0.5)
: static_cast<int>(ratio_h * k);
int y_s = (y_n + 1) < (in_h - 1) ? (y_n + 1) : (in_h - 1);
float d_n = ratio_h * k - y_n;
float d_n = (align_mode == 0 && !align_corners)
? ratio_h * (k + 0.5) - 0.5 - y_n
: ratio_h * k - y_n;
float d_s = 1.f - d_n;
for (int l = 0; l < out_w; l++) {
int x_w = static_cast<int>(ratio_w * l);
int x_w = (align_mode == 0 && !align_corners)
? static_cast<int>(ratio_w * (l + 0.5) - 0.5)
: static_cast<int>(ratio_w * l);
int x_e = (x_w + 1) < (in_w - 1) ? (x_w + 1) : (in_w - 1);
float d_w = ratio_w * l - x_w;
float d_w = (align_mode == 0 && !align_corners)
? ratio_w * (l + 0.5) - 0.5 - x_w
: ratio_w * l - x_w;
float d_e = 1.f - d_w;
for (int i = 0; i < n; i++) { // loop for batches
......@@ -78,19 +90,20 @@ static void BilinearInterpolation(const Tensor& input, Tensor* output,
}
template <typename T>
static void NearestNeighborInterpolateGrad(const Tensor& output_grad,
Tensor* input_grad,
const float ratio_h,
const float ratio_w, const int n,
const int c, const int out_h,
const int out_w) {
static void NearestNeighborInterpolateGrad(
const Tensor& output_grad, Tensor* input_grad, const float ratio_h,
const float ratio_w, const int n, const int c, const int out_h,
const int out_w, const bool align_corners) {
auto input_grad_t = EigenTensor<T, 4>::From(*input_grad);
auto output_grad_t = EigenTensor<T, 4>::From(output_grad);
for (int k = 0; k < out_h; k++) { // loop for images
int in_k = static_cast<int>(ratio_h * k + 0.5);
int in_k = (align_corners) ? static_cast<int>(ratio_h * k + 0.5)
: static_cast<int>(ratio_h * k);
for (int l = 0; l < out_w; l++) {
int in_l = static_cast<int>(ratio_w * l + 0.5);
int in_l = (align_corners) ? static_cast<int>(ratio_w * l + 0.5)
: static_cast<int>(ratio_w * l);
for (int i = 0; i < n; i++) { // loop for batches
for (int j = 0; j < c; j++) { // loop for channels
......@@ -106,19 +119,29 @@ static void BilinearInterpolationGrad(const Tensor& output_grad,
Tensor* input_grad, const float ratio_h,
const float ratio_w, const int in_h,
const int in_w, const int n, const int c,
const int out_h, const int out_w) {
const int out_h, const int out_w,
const bool align_corners,
const int align_mode) {
auto input_grad_t = EigenTensor<T, 4>::From(*input_grad);
auto output_grad_t = EigenTensor<T, 4>::From(output_grad);
for (int k = 0; k < out_h; k++) { // loop for images
int y_n = static_cast<int>(ratio_h * k);
int y_n = (align_mode == 0 && !align_corners)
? static_cast<int>(ratio_h * (k + 0.5) - 0.5)
: static_cast<int>(ratio_h * k);
int y_s = (y_n + 1) < (in_h - 1) ? (y_n + 1) : (in_h - 1);
float d_n = ratio_h * k - y_n;
float d_n = (align_mode == 0 && !align_corners)
? ratio_h * (k + 0.5) - 0.5 - y_n
: ratio_h * k - y_n;
float d_s = 1.f - d_n;
for (int l = 0; l < out_w; l++) {
int x_w = static_cast<int>(ratio_w * l);
int x_w = (align_mode == 0 && !align_corners)
? static_cast<int>(ratio_w * (l + 0.5) - 0.5)
: static_cast<int>(ratio_w * l);
int x_e = (x_w + 1) < (in_w - 1) ? (x_w + 1) : (in_w - 1);
float d_w = ratio_w * l - x_w;
float d_w = (align_mode == 0 && !align_corners)
? ratio_w * (l + 0.5) - 0.5 - x_w
: ratio_w * l - x_w;
float d_e = 1.f - d_w;
for (int i = 0; i < n; i++) { // loop for batches
......@@ -134,7 +157,6 @@ static void BilinearInterpolationGrad(const Tensor& output_grad,
}
}
}
template <typename T>
class InterpolateKernel : public framework::OpKernel<T> {
public:
......@@ -151,6 +173,8 @@ class InterpolateKernel : public framework::OpKernel<T> {
out_h = out_size_data[0];
out_w = out_size_data[1];
}
bool align_corners = ctx.Attr<bool>("align_corners");
int align_mode = ctx.Attr<int>("align_mode");
const int n = input->dims()[0];
const int c = input->dims()[1];
......@@ -168,17 +192,19 @@ class InterpolateKernel : public framework::OpKernel<T> {
return;
}
float ratio_h =
(out_h > 1) ? static_cast<float>(in_h - 1) / (out_h - 1) : 0.f;
float ratio_w =
(out_w > 1) ? static_cast<float>(in_w - 1) / (out_w - 1) : 0.f;
float ratio_h = (align_corners && out_h > 1)
? static_cast<float>(in_h - 1) / (out_h - 1)
: static_cast<float>(in_h) / out_h;
float ratio_w = (align_corners && out_w > 1)
? static_cast<float>(in_w - 1) / (out_w - 1)
: static_cast<float>(in_w) / out_w;
if ("bilinear" == interp_method) {
BilinearInterpolation<T>(*input, output, ratio_h, ratio_w, in_h, in_w, n,
c, out_h, out_w);
c, out_h, out_w, align_corners, align_mode);
} else if ("nearest" == interp_method) {
NearestNeighborInterpolate<T>(*input, output, ratio_h, ratio_w, n, c,
out_h, out_w);
out_h, out_w, align_corners);
}
}
};
......@@ -200,6 +226,8 @@ class InterpolateGradKernel : public framework::OpKernel<T> {
out_h = out_size_data[0];
out_w = out_size_data[1];
}
bool align_corners = ctx.Attr<bool>("align_corners");
int align_mode = ctx.Attr<int>("align_mode");
const int n = input->dims()[0];
const int c = input->dims()[1];
......@@ -217,17 +245,21 @@ class InterpolateGradKernel : public framework::OpKernel<T> {
return;
}
float ratio_h =
(out_h > 1) ? static_cast<float>(in_h - 1) / (out_h - 1) : 0.f;
float ratio_w =
(out_w > 1) ? static_cast<float>(in_w - 1) / (out_w - 1) : 0.f;
float ratio_h = (align_corners && out_h > 1)
? static_cast<float>(in_h - 1) / (out_h - 1)
: static_cast<float>(in_h) / out_h;
float ratio_w = (align_corners && out_w > 1)
? static_cast<float>(in_w - 1) / (out_w - 1)
: static_cast<float>(in_w) / out_w;
if ("bilinear" == interp_method) {
BilinearInterpolationGrad<T>(*output_grad, input_grad, ratio_h, ratio_w,
in_h, in_w, n, c, out_h, out_w);
in_h, in_w, n, c, out_h, out_w,
align_corners, align_mode);
} else if ("nearest" == interp_method) {
NearestNeighborInterpolateGrad<T>(*output_grad, input_grad, ratio_h,
ratio_w, n, c, out_h, out_w);
ratio_w, n, c, out_h, out_w,
align_corners);
}
}
};
......
......@@ -913,7 +913,7 @@ def dynamic_gru(input,
create ParamAttr as param_attr. If the Initializer of the param_attr
is not set, the parameter is initialized with Xavier. Default: None.
bias_attr (ParamAttr|bool|None): The parameter attribute for the bias
of GRU. Note that the bias with :math:`(1 \\times 3D)` concatenates
of GRU.Note that the bias with :math:`(1 \\times 3D)` concatenates
the bias in the update gate, reset gate and candidate calculations.
If it is set to False, no bias will be applied to the update gate,
reset gate and candidate calculations. If it is set to None or one
......@@ -1034,7 +1034,7 @@ def gru_unit(input,
create ParamAttr as param_attr. If the Initializer of the param_attr
is not set, the parameter is initialized with Xavier. Default: None.
bias_attr (ParamAttr|bool|None): The parameter attribute for the bias
of GRU. Note that the bias with :math:`(1 \\times 3D)` concatenates
of GRU.Note that the bias with :math:`(1 \\times 3D)` concatenates
the bias in the update gate, reset gate and candidate calculations.
If it is set to False, no bias will be applied to the update gate,
reset gate and candidate calculations. If it is set to None or one
......@@ -5350,7 +5350,7 @@ def transpose(x, perm, name=None):
Examples:
.. code-block:: python
# use append_batch_size=False to avoid prepending extra
# use append_batch_size=False to avoid prepending extra
# batch size in shape
x = fluid.layers.data(name='x', shape=[5, 10, 15],
dtype='float32', append_batch_size=False)
......@@ -5866,7 +5866,7 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=False, name=None):
than :attr:`shape`.
act (str): The non-linear activation to be applied to the reshaped tensor
variable.
inplace(bool): Must use :attr:`False` if :attr:`x` is used in multiple
inplace(bool): Must use :attr:`False` if :attr:`x` is used in multiple
operators. If this flag is set :attr:`True`, reuse input
:attr:`x` to reshape, which will change the shape of
tensor variable :attr:`x` and might cause errors when
......@@ -6527,7 +6527,9 @@ def image_resize(input,
scale=None,
name=None,
resample='BILINEAR',
actual_shape=None):
actual_shape=None,
align_corners=True,
align_mode=0):
"""
**Resize a Batch of Images**
......@@ -6540,6 +6542,83 @@ def image_resize(input,
'NEAREST' : Nearest neighbor interpolation
Nearest neighbor interpolation is to perform nearest neighbor interpolation
in both the 3rd dimention(in height direction) and the 4th dimention(in width
direction) on input tensor.
Bilinear interpolation is an extension of linear interpolation for
interpolating functions of two variables (e.g. H-direction and
W-direction in this op) on a rectilinear 2D grid. The key idea is
to perform linear interpolation first in one direction, and then
again in the other direction.
Align_corners and align_mode are optinal parameters,The calculation method
of interpolation can be selected by them.
Example:
for scale:
if align_corners = True && out_size > 1 :
scale_factor = (in_size-1.0)/(out_size-1.0)
else:
scale_factor = float(in_size/out_size)
Nearest neighbor interpolation:
case 1:
align_corners = False
input : (N,C,H_in,W_in)
output: (N,C,H_out,W_out) where:
H_out = \left \lfloor {H_{in} * scale_{}factor}} \right \rfloor
W_out = \left \lfloor {W_{in} * scale_{}factor}} \right \rfloor
case 2:
align_corners = True
input : (N,C,H_in,W_in)
output: (N,C,H_out,W_out) where:
H_out = round(H_{in} * scale_{factor})
W_out = round(W_{in} * scale_{factor})
Bilinear interpolation:
case 1:
align_corners = False , align_mode = 0
input : (N,C,H_in,W_in)
output: (N,C,H_out,W_out) where:
H_out = (H_{in}+0.5) * scale_{factor} - 0.5
W_out = (W_{in}+0.5) * scale_{factor} - 0.5
case 2:
align_corners = False , align_mode = 1
or
align_corners = True
input : (N,C,H_in,W_in)
output: (N,C,H_out,W_out) where:
H_out = H_{in} * scale_{factor}
W_out = W_{in} * scale_{factor}
For details of nearest neighbor interpolation, please refer to Wikipedia:
https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation.
For details of bilinear interpolation, please refer to Wikipedia:
https://en.wikipedia.org/wiki/Bilinear_interpolation.
Args:
input (Variable): The input tensor of image resize layer,
This is a 4-D tensor of the shape
......@@ -6569,6 +6648,12 @@ def image_resize(input,
set, otherwise errors would be occured in graph
constructing stage.
Default: None
align_corners(bool) : An optional bool, If True, the centers of the 4 corner pixels of the
input and output tensors are aligned, preserving the values at the
corner pixels.
Default: True
align_mode(int) : An optional input to specify align_corners mode. can be \'0\'
for pytorch calculation method, can be \'1'\ for tensorflow calculation method.
Returns:
Variable: The output is a 4-D tensor of the shape
......@@ -6581,6 +6666,8 @@ def image_resize(input,
or 'NEAREST' currently.
ValueError: One of out_shape and scale must not be None.
ValueError: out_shape length should be 2.
TypeError: align_corners shoule be a bool value
ValueError: align_mode can only be '0' or '1'
Examples:
.. code-block:: python
......@@ -6596,6 +6683,12 @@ def image_resize(input,
"The 'resample' of image_resize can only be 'BILINEAR' or 'NEAREST' currently."
)
resample_type = resample_methods[resample]
if not isinstance(align_corners, bool):
raise TypeError("Attr align_corners should be a bool value")
if align_mode != 0 and align_mode != 1:
raise ValueError("align_mode can only be 0 or 1")
if out_shape is None and scale is None:
raise ValueError("One of out_shape and scale must not be None.")
helper = LayerHelper('{}_interp'.format(resample_type), **locals())
......@@ -6635,9 +6728,13 @@ def image_resize(input,
type='{}_interp'.format(resample_type),
inputs=inputs,
outputs={"Out": out},
attrs={"out_h": out_h,
"out_w": out_w,
"interp_method": resample_type})
attrs={
"out_h": out_h,
"out_w": out_w,
"interp_method": resample_type,
"align_corners": align_corners,
"align_mode": align_mode
})
return out
......@@ -6646,7 +6743,9 @@ def resize_bilinear(input,
out_shape=None,
scale=None,
name=None,
actual_shape=None):
actual_shape=None,
align_corners=True,
align_mode=0):
"""
Resize input by performing bilinear interpolation based on given
output shape which specified by actual_shape, out_shape and scale
......@@ -6661,6 +6760,50 @@ def resize_bilinear(input,
For details of bilinear interpolation, please refer to Wikipedia:
https://en.wikipedia.org/wiki/Bilinear_interpolation
Align_corners and align_mode are optinal parameters,The calculation
method of interpolation can be selected by them.
Align_corners and align_mode are optinal parameters,The calculation method
of interpolation can be selected by them.
Example:
for scale:
if align_corners = True && out_size > 1 :
scale_factor = (in_size-1.0)/(out_size-1.0)
else:
scale_factor = float(in_size/out_size)
Bilinear interpolation:
case 1:
align_corners = False , align_mode = 0
input : (N,C,H_in,W_in)
output: (N,C,H_out,W_out) where:
H_out = (H_{in}+0.5) * scale_{factor} - 0.5
W_out = (W_{in}+0.5) * scale_{factor} - 0.5
case 2:
align_corners = False , align_mode = 1
or
align_corners = True
input : (N,C,H_in,W_in)
output: (N,C,H_out,W_out) where:
H_out = H_{in} * scale_{factor}
W_out = W_{in} * scale_{factor}
Args:
input(${x_type}): ${x_comment}.
......@@ -6684,6 +6827,8 @@ def resize_bilinear(input,
set, otherwise errors would be occured in graph
constructing stage.
Default: None
align_corners(bool): ${align_corners_comment}
align_mode(bool): ${align_mode_comment}
Returns:
${out_comment}.
......@@ -6694,7 +6839,8 @@ def resize_bilinear(input,
out = fluid.layers.resize_bilinear(input, out_shape=[12, 12])
"""
return image_resize(input, out_shape, scale, name, 'BILINEAR', actual_shape)
return image_resize(input, out_shape, scale, name, 'BILINEAR', actual_shape,
align_corners, align_mode)
@templatedoc(op_type="nearest_interp")
......@@ -6702,13 +6848,48 @@ def resize_nearest(input,
out_shape=None,
scale=None,
name=None,
actual_shape=None):
actual_shape=None,
align_corners=True):
"""
Resize input by performing nearest neighbor interpolation in both the
3rd dimention(in height direction) and the 4th dimention(in width
direction) based on given output shape which specified by actual_shape,
out_shape and scale in priority order.
Example:
for scale:
if align_corners = True && out_size > 1 :
scale_factor = (in_size-1.0)/(out_size-1.0)
else:
scale_factor = float(in_size/out_size)
Nearest neighbor interpolation:
case 1:
align_corners = False
input : (N,C,H_in,W_in)
output: (N,C,H_out,W_out) where:
H_out = \left \lfloor {H_{in} * scale_{}factor}} \right \rfloor
W_out = \left \lfloor {W_{in} * scale_{}factor}} \right \rfloor
case 2:
align_corners = True
input : (N,C,H_in,W_in)
output: (N,C,H_out,W_out) where:
H_out = round(H_{in} * scale_{factor})
W_out = round(W_{in} * scale_{factor})
For details of nearest neighbor interpolation, please refer to Wikipedia:
https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation
......@@ -6735,6 +6916,7 @@ def resize_nearest(input,
set, otherwise errors would be occured in graph
constructing stage.
Default: None
align_corners(bool): ${align_corners_comment}
Returns:
${out_comment}.
......@@ -6745,7 +6927,8 @@ def resize_nearest(input,
out = fluid.layers.resize_nearest(input, out_shape=[12, 12])
"""
return image_resize(input, out_shape, scale, name, 'NEAREST', actual_shape)
return image_resize(input, out_shape, scale, name, 'NEAREST', actual_shape,
align_corners)
def image_resize_short(input, out_short_len, resample='BILINEAR'):
......
......@@ -20,7 +20,13 @@ from op_test import OpTest
import paddle.fluid.core as core
def bilinear_interp_np(input, out_h, out_w, out_size=None, actual_shape=None):
def bilinear_interp_np(input,
out_h,
out_w,
out_size=None,
actual_shape=None,
align_corners=True,
align_mode=0):
"""bilinear interpolation implement in shape [N, C, H, W]"""
if out_size is not None:
out_h = out_size[0]
......@@ -29,25 +35,41 @@ def bilinear_interp_np(input, out_h, out_w, out_size=None, actual_shape=None):
out_h = actual_shape[0]
out_w = actual_shape[1]
batch_size, channel, in_h, in_w = input.shape
if out_h > 1:
ratio_h = ratio_w = 0.0
if (align_corners and out_h > 1):
ratio_h = (in_h - 1.0) / (out_h - 1.0)
else:
ratio_h = 0.0
if out_w > 1:
ratio_h = 1.0 * in_h / out_h
if (align_corners and out_w > 1):
ratio_w = (in_w - 1.0) / (out_w - 1.0)
else:
ratio_w = 0.0
ratio_w = 1.0 * in_w / out_w
out = np.zeros((batch_size, channel, out_h, out_w))
for i in range(out_h):
h = int(ratio_h * i)
if (align_mode == 0 and not align_corners):
h = int(ratio_h * (i + 0.5) - 0.5)
else:
h = int(ratio_h * i)
hid = 1 if h < in_h - 1 else 0
h1lambda = ratio_h * i - h
if (align_mode == 0 and not align_corners):
h1lambda = ratio_h * (i + 0.5) - 0.5 - h
else:
h1lambda = ratio_h * i - h
h2lambda = 1.0 - h1lambda
for j in range(out_w):
w = int(ratio_w * j)
if (align_mode == 0 and not align_corners):
w = int(ratio_w * (j + 0.5) - 0.5)
else:
w = int(ratio_w * j)
wid = 1 if w < in_w - 1 else 0
w1lambda = ratio_w * j - w
if (align_mode == 0 and not align_corners):
w1lambda = ratio_w * (j + 0.5) - 0.5 - w
else:
w1lambda = ratio_w * j - w
w2lambda = 1.0 - w1lambda
out[:, :, i, j] = h2lambda*(w2lambda*input[:, :, h, w] +
......@@ -66,7 +88,8 @@ class TestBilinearInterpOp(OpTest):
input_np = np.random.random(self.input_shape).astype("float32")
output_np = bilinear_interp_np(input_np, self.out_h, self.out_w,
self.out_size, self.actual_shape)
self.out_size, self.actual_shape,
self.align_corners, self.align_mode)
self.inputs = {'X': input_np}
if self.out_size is not None:
self.inputs['OutSize'] = self.out_size
......@@ -75,7 +98,9 @@ class TestBilinearInterpOp(OpTest):
self.attrs = {
'out_h': self.out_h,
'out_w': self.out_w,
'interp_method': self.interp_method
'interp_method': self.interp_method,
'align_corners': self.align_corners,
'align_mode': self.align_mode
}
self.outputs = {'Out': output_np}
......@@ -91,6 +116,8 @@ class TestBilinearInterpOp(OpTest):
self.out_h = 2
self.out_w = 2
self.out_size = np.array([3, 3]).astype("int32")
self.align_corners = False
self.align_mode = 0
class TestBilinearInterpCase1(TestBilinearInterpOp):
......@@ -99,6 +126,8 @@ class TestBilinearInterpCase1(TestBilinearInterpOp):
self.input_shape = [4, 1, 7, 8]
self.out_h = 1
self.out_w = 1
self.align_corners = False
self.align_mode = 0
class TestBilinearInterpCase2(TestBilinearInterpOp):
......@@ -107,6 +136,8 @@ class TestBilinearInterpCase2(TestBilinearInterpOp):
self.input_shape = [3, 3, 9, 6]
self.out_h = 12
self.out_w = 12
self.align_corners = False
self.align_mode = 0
class TestBilinearInterpCase3(TestBilinearInterpOp):
......@@ -115,6 +146,8 @@ class TestBilinearInterpCase3(TestBilinearInterpOp):
self.input_shape = [1, 1, 128, 64]
self.out_h = 64
self.out_w = 128
self.align_corners = False
self.align_mode = 0
class TestBilinearInterpCase4(TestBilinearInterpOp):
......@@ -124,6 +157,8 @@ class TestBilinearInterpCase4(TestBilinearInterpOp):
self.out_h = 1
self.out_w = 1
self.out_size = np.array([2, 2]).astype("int32")
self.align_corners = False
self.align_mode = 0
class TestBilinearInterpCase5(TestBilinearInterpOp):
......@@ -133,6 +168,8 @@ class TestBilinearInterpCase5(TestBilinearInterpOp):
self.out_h = 12
self.out_w = 12
self.out_size = np.array([11, 11]).astype("int32")
self.align_corners = False
self.align_mode = 0
class TestBilinearInterpCase6(TestBilinearInterpOp):
......@@ -142,6 +179,8 @@ class TestBilinearInterpCase6(TestBilinearInterpOp):
self.out_h = 64
self.out_w = 128
self.out_size = np.array([65, 129]).astype("int32")
self.align_corners = False
self.align_mode = 0
class TestBilinearInterpActualShape(TestBilinearInterpOp):
......@@ -151,6 +190,8 @@ class TestBilinearInterpActualShape(TestBilinearInterpOp):
self.out_h = 64
self.out_w = 32
self.out_size = np.array([66, 40]).astype("int32")
self.align_corners = False
self.align_mode = 0
class TestBilinearInterpOpUint8(OpTest):
......@@ -162,14 +203,17 @@ class TestBilinearInterpOpUint8(OpTest):
input_np = np.random.randint(
low=0, high=256, size=self.input_shape).astype("uint8")
output_np = bilinear_interp_np(input_np, self.out_h, self.out_w,
self.out_size, self.actual_shape)
self.out_size, self.actual_shape,
self.align_corners, self.align_mode)
self.inputs = {'X': input_np}
if self.out_size is not None:
self.inputs['OutSize'] = self.out_size
self.attrs = {
'out_h': self.out_h,
'out_w': self.out_w,
'interp_method': self.interp_method
'interp_method': self.interp_method,
'align_corners': self.align_corners,
'align_mode': self.align_mode
}
self.outputs = {'Out': output_np}
......@@ -181,6 +225,8 @@ class TestBilinearInterpOpUint8(OpTest):
self.input_shape = [1, 3, 9, 6]
self.out_h = 10
self.out_w = 9
self.align_corners = False
self.align_mode = 0
class TestBilinearInterpCase1Uint8(TestBilinearInterpOpUint8):
......@@ -189,6 +235,8 @@ class TestBilinearInterpCase1Uint8(TestBilinearInterpOpUint8):
self.input_shape = [2, 3, 128, 64]
self.out_h = 120
self.out_w = 50
self.align_corners = False
self.align_mode = 0
class TestBilinearInterpCase2Uint8(TestBilinearInterpOpUint8):
......@@ -198,6 +246,26 @@ class TestBilinearInterpCase2Uint8(TestBilinearInterpOpUint8):
self.out_h = 5
self.out_w = 13
self.out_size = np.array([6, 15]).astype("int32")
self.align_corners = False
self.align_mode = 0
class TestBilinearInterpOtherMethod1(TestBilinearInterpOp):
def set_align_mode(self):
self.align_mode = 1
self.align_corners = False
class TestBilinearInterpWithMethod2(TestBilinearInterpOp):
def set_align_mode(self):
self.align_corners = True
self.align_mode = 1
class TestBilinearInterpWithMethod3(TestBilinearInterpOp):
def set_align_mode(self):
self.align_corners = True
self.align_mode = 0
if __name__ == "__main__":
......
......@@ -24,7 +24,8 @@ def nearest_neighbor_interp_np(X,
out_h,
out_w,
out_size=None,
actual_shape=None):
actual_shape=None,
align_corners=True):
"""nearest neighbor interpolation implement in shape [N, C, H, W]"""
if out_size is not None:
out_h = out_size[0]
......@@ -35,17 +36,29 @@ def nearest_neighbor_interp_np(X,
n, c, in_h, in_w = X.shape
ratio_h = ratio_w = 0.0
if out_h > 1:
if (align_corners and out_h > 1):
ratio_h = (in_h - 1.0) / (out_h - 1.0)
if out_w > 1:
else:
ratio_h = 1.0 * in_h / out_h
if (align_corners and out_w > 1):
ratio_w = (in_w - 1.0) / (out_w - 1.0)
else:
ratio_w = 1.0 * in_w / out_w
out = np.zeros((n, c, out_h, out_w))
for i in range(out_h):
in_i = int(ratio_h * i + 0.5)
for j in range(out_w):
in_j = int(ratio_w * j + 0.5)
out[:, :, i, j] = X[:, :, in_i, in_j]
if align_corners:
for i in range(out_h):
in_i = int(ratio_h * i + 0.5)
for j in range(out_w):
in_j = int(ratio_w * j + 0.5)
out[:, :, i, j] = X[:, :, in_i, in_j]
else:
for i in range(out_h):
in_i = int(ratio_h * i)
for j in range(out_w):
in_j = int(ratio_w * j)
out[:, :, i, j] = X[:, :, in_i, in_j]
return out.astype(X.dtype)
......@@ -59,7 +72,8 @@ class TestNearestInterpOp(OpTest):
input_np = np.random.random(self.input_shape).astype("float32")
output_np = nearest_neighbor_interp_np(input_np, self.out_h, self.out_w,
self.out_size, self.actual_shape)
self.out_size, self.actual_shape,
self.align_corners)
self.inputs = {'X': input_np}
if self.out_size is not None:
self.inputs['OutSize'] = self.out_size
......@@ -68,7 +82,8 @@ class TestNearestInterpOp(OpTest):
self.attrs = {
'out_h': self.out_h,
'out_w': self.out_w,
'interp_method': self.interp_method
'interp_method': self.interp_method,
'align_corners': self.align_corners,
}
self.outputs = {'Out': output_np}
......@@ -84,6 +99,7 @@ class TestNearestInterpOp(OpTest):
self.out_h = 2
self.out_w = 2
self.out_size = np.array([3, 3]).astype("int32")
self.align_corners = True
class TestNearestNeighborInterpCase1(TestNearestInterpOp):
......@@ -92,6 +108,7 @@ class TestNearestNeighborInterpCase1(TestNearestInterpOp):
self.input_shape = [4, 1, 7, 8]
self.out_h = 1
self.out_w = 1
self.align_corners = False
class TestNearestNeighborInterpCase2(TestNearestInterpOp):
......@@ -100,6 +117,7 @@ class TestNearestNeighborInterpCase2(TestNearestInterpOp):
self.input_shape = [3, 3, 9, 6]
self.out_h = 12
self.out_w = 12
self.align_corners = True
class TestNearestNeighborInterpCase3(TestNearestInterpOp):
......@@ -108,6 +126,7 @@ class TestNearestNeighborInterpCase3(TestNearestInterpOp):
self.input_shape = [1, 1, 128, 64]
self.out_h = 64
self.out_w = 128
self.align_corners = True
class TestNearestNeighborInterpCase4(TestNearestInterpOp):
......@@ -117,6 +136,7 @@ class TestNearestNeighborInterpCase4(TestNearestInterpOp):
self.out_h = 1
self.out_w = 1
self.out_size = np.array([2, 2]).astype("int32")
self.align_corners = True
class TestNearestNeighborInterpCase5(TestNearestInterpOp):
......@@ -126,6 +146,7 @@ class TestNearestNeighborInterpCase5(TestNearestInterpOp):
self.out_h = 12
self.out_w = 12
self.out_size = np.array([11, 11]).astype("int32")
self.align_corners = True
class TestNearestNeighborInterpCase6(TestNearestInterpOp):
......@@ -135,6 +156,7 @@ class TestNearestNeighborInterpCase6(TestNearestInterpOp):
self.out_h = 64
self.out_w = 128
self.out_size = np.array([65, 129]).astype("int32")
self.align_corners = True
class TestNearestNeighborInterpActualShape(TestNearestInterpOp):
......@@ -144,6 +166,7 @@ class TestNearestNeighborInterpActualShape(TestNearestInterpOp):
self.out_h = 64
self.out_w = 32
self.out_size = np.array([66, 40]).astype("int32")
self.align_corners = True
class TestNearestInterpOpUint8(OpTest):
......@@ -155,14 +178,16 @@ class TestNearestInterpOpUint8(OpTest):
input_np = np.random.randint(
low=0, high=256, size=self.input_shape).astype("uint8")
output_np = nearest_neighbor_interp_np(input_np, self.out_h, self.out_w,
self.out_size, self.actual_shape)
self.out_size, self.actual_shape,
self.align_corners)
self.inputs = {'X': input_np}
if self.out_size is not None:
self.inputs['OutSize'] = self.out_size
self.attrs = {
'out_h': self.out_h,
'out_w': self.out_w,
'interp_method': self.interp_method
'interp_method': self.interp_method,
'align_corners': self.align_corners
}
self.outputs = {'Out': output_np}
......@@ -174,6 +199,7 @@ class TestNearestInterpOpUint8(OpTest):
self.input_shape = [1, 3, 9, 6]
self.out_h = 10
self.out_w = 9
self.align_corners = True
class TestNearestNeighborInterpCase1Uint8(TestNearestInterpOpUint8):
......@@ -182,6 +208,7 @@ class TestNearestNeighborInterpCase1Uint8(TestNearestInterpOpUint8):
self.input_shape = [2, 3, 128, 64]
self.out_h = 120
self.out_w = 50
self.align_corners = False
class TestNearestNeighborInterpCase2Uint8(TestNearestInterpOpUint8):
......@@ -191,6 +218,12 @@ class TestNearestNeighborInterpCase2Uint8(TestNearestInterpOpUint8):
self.out_h = 5
self.out_w = 13
self.out_size = np.array([6, 15]).astype("int32")
self.align_corners = True
class TestNearestInterpWithoutCorners(TestNearestInterpOp):
def set_align_corners(self):
self.align_corners = False
if __name__ == "__main__":
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册