From cb8f3c03a7aded3c6926df9826b6bcde76abc2a8 Mon Sep 17 00:00:00 2001 From: Zhang Ting Date: Mon, 23 Sep 2019 21:03:07 +0800 Subject: [PATCH] resize Ops support data_layout:channel_last, test=develop, test=document_preview (#19914) --- paddle/fluid/API.spec | 8 +- paddle/fluid/operators/interpolate_op.cc | 68 ++- paddle/fluid/operators/interpolate_op.cu | 421 +++++++++++++----- paddle/fluid/operators/interpolate_op.h | 283 ++++++++---- python/paddle/fluid/layers/nn.py | 147 +++--- .../unittests/test_bilinear_interp_op.py | 39 +- .../tests/unittests/test_nearest_interp_op.py | 60 ++- .../unittests/test_trilinear_interp_op.py | 74 ++- 8 files changed, 786 insertions(+), 314 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index ae79dc30e8..6ebce107fa 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -194,11 +194,11 @@ paddle.fluid.layers.label_smooth (ArgSpec(args=['label', 'prior_dist', 'epsilon' paddle.fluid.layers.roi_pool (ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1, 1, 1.0)), ('document', '49368d724023a66b41b0071be41c0ba5')) paddle.fluid.layers.roi_align (ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale', 'sampling_ratio', 'name'], varargs=None, keywords=None, defaults=(1, 1, 1.0, -1, None)), ('document', '9a7a3b88a4fae41d58d3ca9b10ba0591')) paddle.fluid.layers.dice_loss (ArgSpec(args=['input', 'label', 'epsilon'], varargs=None, keywords=None, defaults=(1e-05,)), ('document', '7e8e4bf1f0f8612961ed113e8af8f0c5')) -paddle.fluid.layers.image_resize (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'resample', 'actual_shape', 'align_corners', 'align_mode'], varargs=None, keywords=None, defaults=(None, None, None, 'BILINEAR', None, True, 1)), ('document', '0e8567334d72a214c2e3ce0ce19e4d37')) +paddle.fluid.layers.image_resize (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'resample', 'actual_shape', 'align_corners', 'align_mode', 'data_format'], varargs=None, keywords=None, defaults=(None, None, None, 'BILINEAR', None, True, 1, 'NCHW')), ('document', 'd29d829607b5ff12924197a3ba296c89')) paddle.fluid.layers.image_resize_short (ArgSpec(args=['input', 'out_short_len', 'resample'], varargs=None, keywords=None, defaults=('BILINEAR',)), ('document', 'bd97ebfe4bdf5110a5fcb8ecb626a447')) -paddle.fluid.layers.resize_bilinear (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners', 'align_mode'], varargs=None, keywords=None, defaults=(None, None, None, None, True, 1)), ('document', '0a7b98e57eb74bab6e3c2a95e41298a7')) -paddle.fluid.layers.resize_trilinear (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners', 'align_mode'], varargs=None, keywords=None, defaults=(None, None, None, None, True, 1)), ('document', '6baf2ddf375d3059e5aa74d7fde76517')) -paddle.fluid.layers.resize_nearest (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners'], varargs=None, keywords=None, defaults=(None, None, None, None, True)), ('document', '699bf1de6af91235367e9c7a9a6e252c')) +paddle.fluid.layers.resize_bilinear (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners', 'align_mode', 'data_format'], varargs=None, keywords=None, defaults=(None, None, None, None, True, 1, 'NCHW')), ('document', '44da7890c8a362a83a1c0902a1dc1e4d')) +paddle.fluid.layers.resize_trilinear (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners', 'align_mode', 'data_format'], varargs=None, keywords=None, defaults=(None, None, None, None, True, 1, 'NCDHW')), ('document', '5b4d0f823f94c260fe5e6f7eec60a797')) +paddle.fluid.layers.resize_nearest (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners', 'data_format'], varargs=None, keywords=None, defaults=(None, None, None, None, True, 'NCHW')), ('document', '0107a5cbae1aef3f381d3d769a6068eb')) paddle.fluid.layers.gather (ArgSpec(args=['input', 'index', 'overwrite'], varargs=None, keywords=None, defaults=(True,)), ('document', 'f985c9b66e3aec96fa753a8eb44c991c')) paddle.fluid.layers.gather_nd (ArgSpec(args=['input', 'index', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '3cc24f9cf135770aa6263dba25b457f9')) paddle.fluid.layers.scatter (ArgSpec(args=['input', 'index', 'updates', 'name', 'overwrite'], varargs=None, keywords=None, defaults=(None, True)), ('document', '69b22affd4a6326502af166f04c095ab')) diff --git a/paddle/fluid/operators/interpolate_op.cc b/paddle/fluid/operators/interpolate_op.cc index efe49a3e04..612f770bb7 100644 --- a/paddle/fluid/operators/interpolate_op.cc +++ b/paddle/fluid/operators/interpolate_op.cc @@ -19,6 +19,7 @@ namespace paddle { namespace operators { using framework::Tensor; +using DataLayout = framework::DataLayout; static void Interpolate2DInferShapeCheck(framework::InferShapeContext* ctx) { auto dim_x = ctx->GetInputDim("X"); @@ -28,6 +29,8 @@ static void Interpolate2DInferShapeCheck(framework::InferShapeContext* ctx) { "bilinear" == interp_method || "nearest" == interp_method, "Interpolation method can only be \"bilinear\" or \"nearest\" when " "Input(X) dimension is 4"); + const DataLayout data_layout = framework::StringToDataLayout( + ctx->Attrs().Get("data_layout")); if (ctx->HasInputs("SizeTensor")) { // top prority size @@ -38,8 +41,13 @@ static void Interpolate2DInferShapeCheck(framework::InferShapeContext* ctx) { "Attr(out_shape)'s length must be 2 for 4-D input tensor."); int out_h = ctx->Attrs().Get("out_h"); int out_w = ctx->Attrs().Get("out_w"); - std::vector dim_out({dim_x[0], dim_x[1], out_h, out_w}); - ctx->SetOutputDim("Out", framework::make_ddim(dim_out)); + framework::DDim dim_out; + if (data_layout == DataLayout::kNCHW) { + dim_out = {dim_x[0], dim_x[1], out_h, out_w}; + } else { + dim_out = {dim_x[0], out_h, out_w, dim_x[3]}; + } + ctx->SetOutputDim("Out", dim_out); return; } @@ -55,8 +63,12 @@ static void Interpolate2DInferShapeCheck(framework::InferShapeContext* ctx) { float scale = ctx->Attrs().Get("scale"); if (scale > 0) { // round down - out_h = static_cast(dim_x[2] * scale); - out_w = static_cast(dim_x[3] * scale); + out_h = (data_layout == DataLayout::kNCHW + ? static_cast(dim_x[2] * scale) + : static_cast(dim_x[1] * scale)); + out_w = (data_layout == DataLayout::kNCHW + ? static_cast(dim_x[3] * scale) + : static_cast(dim_x[2] * scale)); // protect when input shape is -1 out_h = out_h > 0 ? out_h : -1; out_w = out_w > 0 ? out_w : -1; @@ -75,8 +87,13 @@ static void Interpolate2DInferShapeCheck(framework::InferShapeContext* ctx) { return; } - std::vector dim_out({dim_x[0], dim_x[1], out_h, out_w}); - ctx->SetOutputDim("Out", framework::make_ddim(dim_out)); + framework::DDim dim_out; + if (data_layout == DataLayout::kNCHW) { + dim_out = {dim_x[0], dim_x[1], out_h, out_w}; + } else { + dim_out = {dim_x[0], out_h, out_w, dim_x[3]}; + } + ctx->SetOutputDim("Out", dim_out); } static void Interpolate3DInferShapeCheck(framework::InferShapeContext* ctx) { @@ -86,6 +103,8 @@ static void Interpolate3DInferShapeCheck(framework::InferShapeContext* ctx) { PADDLE_ENFORCE("trilinear" == interp_method, "Interpolation method can only be \"trilinear\" when Input(X) " "dimension is 5"); + const DataLayout data_layout = framework::StringToDataLayout( + ctx->Attrs().Get("data_layout")); if (ctx->HasInputs("SizeTensor")) { // top prority size @@ -97,8 +116,13 @@ static void Interpolate3DInferShapeCheck(framework::InferShapeContext* ctx) { int out_d = ctx->Attrs().Get("out_d"); int out_h = ctx->Attrs().Get("out_h"); int out_w = ctx->Attrs().Get("out_w"); - std::vector dim_out({dim_x[0], dim_x[1], out_d, out_h, out_w}); - ctx->SetOutputDim("Out", framework::make_ddim(dim_out)); + framework::DDim dim_out; + if (data_layout == DataLayout::kNCHW) { + dim_out = {dim_x[0], dim_x[1], out_d, out_h, out_w}; + } else { + dim_out = {dim_x[0], out_d, out_h, out_w, dim_x[4]}; + } + ctx->SetOutputDim("Out", dim_out); return; } @@ -115,9 +139,15 @@ static void Interpolate3DInferShapeCheck(framework::InferShapeContext* ctx) { float scale = ctx->Attrs().Get("scale"); if (scale > 0) { // round down - out_d = static_cast(dim_x[2] * scale); - out_h = static_cast(dim_x[3] * scale); - out_w = static_cast(dim_x[4] * scale); + out_d = (data_layout == DataLayout::kNCHW + ? static_cast(dim_x[2] * scale) + : static_cast(dim_x[1] * scale)); + out_h = (data_layout == DataLayout::kNCHW + ? static_cast(dim_x[3] * scale) + : static_cast(dim_x[2] * scale)); + out_w = (data_layout == DataLayout::kNCHW + ? static_cast(dim_x[4] * scale) + : static_cast(dim_x[3] * scale)); // protect when input shape is -1 out_d = out_d > 0 ? out_d : -1; out_h = out_h > 0 ? out_h : -1; @@ -138,8 +168,13 @@ static void Interpolate3DInferShapeCheck(framework::InferShapeContext* ctx) { return; } - std::vector dim_out({dim_x[0], dim_x[1], out_d, out_h, out_w}); - ctx->SetOutputDim("Out", framework::make_ddim(dim_out)); + framework::DDim dim_out; + if (data_layout == DataLayout::kNCHW) { + dim_out = {dim_x[0], dim_x[1], out_d, out_h, out_w}; + } else { + dim_out = {dim_x[0], out_d, out_h, out_w, dim_x[4]}; + } + ctx->SetOutputDim("Out", dim_out); } class InterpolateOp : public framework::OperatorWithKernel { @@ -213,6 +248,13 @@ class InterpolateOpMaker : public framework::OpProtoAndCheckerMaker { "The output tensor of interpolate operator, " "This is a tensor in same rank with Input(X)."); + AddAttr( + "data_layout", + "(string, default NCHW) Only used in " + "an optional string from: \"NHWC\", \"NCHW\". " + "Specify that the data format of the input and output data is " + "channel_first or channel_last.") + .SetDefault("NCHW"); AddAttr("out_d", "output depth of interpolate op.").SetDefault(0); AddAttr("out_h", "output height of interpolate op.").SetDefault(0); AddAttr("out_w", "output width of interpolate op.").SetDefault(0); diff --git a/paddle/fluid/operators/interpolate_op.cu b/paddle/fluid/operators/interpolate_op.cu index 45e606f388..6121389c12 100644 --- a/paddle/fluid/operators/interpolate_op.cu +++ b/paddle/fluid/operators/interpolate_op.cu @@ -17,6 +17,7 @@ namespace paddle { namespace operators { using framework::Tensor; +using DataLayout = framework::DataLayout; template __global__ void KeNearestNeighborInterpFw( @@ -24,7 +25,7 @@ __global__ void KeNearestNeighborInterpFw( const size_t input_h, const size_t input_w, T* out, const size_t out_img_h, const size_t out_img_w, const size_t output_h, const size_t output_w, const size_t num_channels, const float ratio_h, const float ratio_w, - const bool align_corners) { + const bool align_corners, const DataLayout data_layout) { int nthreads = output_h * output_w; int tid = blockIdx.x * blockDim.x + threadIdx.x; int stride = blockDim.x * gridDim.x; @@ -33,20 +34,32 @@ __global__ void KeNearestNeighborInterpFw( int out_id_w = tid % output_w; int in_img_size = input_w / num_channels; int out_img_size = output_w / num_channels; - int channel_id = out_id_w / out_img_size; - int out_img_idy = (out_id_w % out_img_size) / out_img_w; + int channel_id, out_img_idy, out_img_idx; + if (data_layout == DataLayout::kNCHW) { + channel_id = out_id_w / out_img_size; + out_img_idy = (out_id_w % out_img_size) / out_img_w; + out_img_idx = tid % out_img_w; + } else { + out_img_idy = out_id_w / (out_img_w * num_channels); + out_img_idx = out_id_w % (out_img_w * num_channels) / num_channels; + channel_id = tid % num_channels; + } + int in_img_idy = (align_corners) ? static_cast(ratio_h * out_img_idy + 0.5) : static_cast(ratio_h * out_img_idy); - - int out_img_idx = tid % out_img_w; int in_img_idx = (align_corners) ? static_cast(ratio_w * out_img_idx + 0.5) : static_cast(ratio_w * out_img_idx); - out[tid] = in[out_id_h * input_w + channel_id * in_img_size + - in_img_idy * in_img_w + in_img_idx]; + if (data_layout == DataLayout::kNCHW) { + out[tid] = in[out_id_h * input_w + channel_id * in_img_size + + in_img_idy * in_img_w + in_img_idx]; + } else { + out[tid] = in[out_id_h * input_w + in_img_idy * in_img_w * num_channels + + in_img_idx * num_channels + channel_id]; + } } } @@ -56,7 +69,7 @@ __global__ void KeNearestNeighborInterpBw( const size_t input_w, const T* out, const size_t out_img_h, const size_t out_img_w, const size_t output_h, const size_t output_w, const size_t num_channels, const float ratio_h, const float ratio_w, - const bool align_corners) { + const bool align_corners, const DataLayout data_layout) { int nthreads = output_h * output_w; int tid = blockIdx.x * blockDim.x + threadIdx.x; int stride = blockDim.x * gridDim.x; @@ -65,20 +78,33 @@ __global__ void KeNearestNeighborInterpBw( int out_id_w = tid % output_w; int in_img_size = input_w / num_channels; int out_img_size = output_w / num_channels; - int channel_id = out_id_w / out_img_size; - int out_img_idy = (out_id_w % out_img_size) / out_img_w; + int channel_id, out_img_idy, out_img_idx; + if (data_layout == DataLayout::kNCHW) { + channel_id = out_id_w / out_img_size; + out_img_idy = (out_id_w % out_img_size) / out_img_w; + out_img_idx = tid % out_img_w; + } else { + out_img_idy = out_id_w / (out_img_w * num_channels); + out_img_idx = out_id_w % (out_img_w * num_channels) / num_channels; + channel_id = tid % num_channels; + } + int in_img_idy = (align_corners) ? static_cast(ratio_h * out_img_idy + 0.5) : static_cast(ratio_h * out_img_idy); - - int out_img_idx = tid % out_img_w; int in_img_idx = (align_corners) ? static_cast(ratio_w * out_img_idx + 0.5) : static_cast(ratio_w * out_img_idx); - T* in_pos = &in[out_id_h * input_w + channel_id * in_img_size + - in_img_idy * in_img_w + in_img_idx]; + T* in_pos; + if (data_layout == DataLayout::kNCHW) { + in_pos = &in[out_id_h * input_w + channel_id * in_img_size + + in_img_idy * in_img_w + in_img_idx]; + } else { + in_pos = &in[out_id_h * input_w + in_img_idy * in_img_w * num_channels + + in_img_idx * num_channels + channel_id]; + } const T out_pos = out[out_id_h * output_w + out_id_w]; platform::CudaAtomicAdd(in_pos, out_pos); } @@ -90,7 +116,8 @@ __global__ void KeBilinearInterpFw( const size_t input_h, const size_t input_w, T* out, const size_t out_img_h, const size_t out_img_w, const size_t output_h, const size_t output_w, const size_t num_channels, const float ratio_h, const float ratio_w, - const bool align_corners, const int align_mode) { + const bool align_corners, const int align_mode, + const DataLayout data_layout) { int nthreads = output_h * output_w; int tid = blockIdx.x * blockDim.x + threadIdx.x; int stride = blockDim.x * gridDim.x; @@ -100,9 +127,18 @@ __global__ void KeBilinearInterpFw( int out_id_w = tid % output_w; int in_img_size = input_w / num_channels; int out_img_size = output_w / num_channels; - int channel_id = out_id_w / out_img_size; - int out_img_idy = (out_id_w % out_img_size) / out_img_w; + int channel_id, out_img_idy, out_img_idx; + if (data_layout == DataLayout::kNCHW) { + channel_id = out_id_w / out_img_size; + out_img_idy = (out_id_w % out_img_size) / out_img_w; + out_img_idx = tid % out_img_w; + } else { + out_img_idy = out_id_w / (out_img_w * num_channels); + out_img_idx = out_id_w % (out_img_w * num_channels) / num_channels; + channel_id = tid % num_channels; + } + int in_img_idy = align_flag ? static_cast(ratio_h * (out_img_idy + 0.5) - 0.5) : static_cast(ratio_h * out_img_idy); @@ -114,7 +150,6 @@ __global__ void KeBilinearInterpFw( align_flag ? src_h - in_img_idy : ratio_h * out_img_idy - in_img_idy; T h2lambda = 1.f - h1lambda; - int out_img_idx = tid % out_img_w; int in_img_idx = align_flag ? static_cast(ratio_w * (out_img_idx + 0.5) - 0.5) : static_cast(ratio_w * out_img_idx); @@ -126,14 +161,28 @@ __global__ void KeBilinearInterpFw( align_flag ? src_w - in_img_idx : ratio_w * out_img_idx - in_img_idx; T w2lambda = 1.f - w1lambda; - const T* in_pos = &in[out_id_h * input_w + channel_id * in_img_size + - in_img_idy * in_img_w + in_img_idx]; + if (data_layout == DataLayout::kNCHW) { + const T* in_pos = &in[out_id_h * input_w + channel_id * in_img_size + + in_img_idy * in_img_w + in_img_idx]; - // bilinear interpolation - out[out_id_h * output_w + out_id_w] = - h2lambda * (w2lambda * in_pos[0] + w1lambda * in_pos[w_id]) + - h1lambda * (w2lambda * in_pos[h_id * in_img_w] + - w1lambda * in_pos[h_id * in_img_w + w_id]); + // bilinear interpolation + out[out_id_h * output_w + out_id_w] = + h2lambda * (w2lambda * in_pos[0] + w1lambda * in_pos[w_id]) + + h1lambda * (w2lambda * in_pos[h_id * in_img_w] + + w1lambda * in_pos[h_id * in_img_w + w_id]); + } else { + const T* in_pos = + &in[out_id_h * input_w + in_img_idy * in_img_w * num_channels + + in_img_idx * num_channels + channel_id]; + + // bilinear interpolation + out[out_id_h * output_w + out_id_w] = + h2lambda * + (w2lambda * in_pos[0] + w1lambda * in_pos[w_id * num_channels]) + + h1lambda * (w2lambda * in_pos[h_id * in_img_w * num_channels] + + w1lambda * in_pos[h_id * in_img_w * num_channels + + w_id * num_channels]); + } } } @@ -143,7 +192,8 @@ __global__ void KeBilinearInterpBw( const size_t input_w, const T* out, const size_t out_img_h, const size_t out_img_w, const size_t output_h, const size_t output_w, const size_t num_channels, const T ratio_h, const T ratio_w, - const bool align_corners, const int align_mode) { + const bool align_corners, const int align_mode, + const DataLayout data_layout) { int nthreads = output_h * output_w; int tid = blockIdx.x * blockDim.x + threadIdx.x; int stride = blockDim.x * gridDim.x; @@ -153,9 +203,18 @@ __global__ void KeBilinearInterpBw( int out_id_w = tid % output_w; int in_img_size = input_w / num_channels; int out_img_size = output_w / num_channels; - int channel_id = out_id_w / out_img_size; - int out_img_idy = (out_id_w % out_img_size) / out_img_w; + int channel_id, out_img_idy, out_img_idx; + if (data_layout == DataLayout::kNCHW) { + channel_id = out_id_w / out_img_size; + out_img_idy = (out_id_w % out_img_size) / out_img_w; + out_img_idx = tid % out_img_w; + } else { + out_img_idy = out_id_w / (out_img_w * num_channels); + out_img_idx = out_id_w % (out_img_w * num_channels) / num_channels; + channel_id = tid % num_channels; + } + int in_img_idy = align_flag ? ratio_h * (out_img_idy + 0.5) - 0.5 : ratio_h * out_img_idy; in_img_idy = (in_img_idy > 0) ? in_img_idy : 0; @@ -164,10 +223,8 @@ __global__ void KeBilinearInterpBw( src_h = (src_h > 0) ? src_h : 0; T h1lambda = align_flag ? src_h - in_img_idy : ratio_h * out_img_idy - in_img_idy; - T h2lambda = 1.f - h1lambda; - int out_img_idx = tid % out_img_w; int in_img_idx = align_flag ? ratio_w * (out_img_idx + 0.5) - 0.5 : ratio_w * out_img_idx; in_img_idx = (in_img_idx > 0) ? in_img_idx : 0; @@ -178,15 +235,34 @@ __global__ void KeBilinearInterpBw( align_flag ? src_w - in_img_idx : ratio_w * out_img_idx - in_img_idx; T w2lambda = 1.f - w1lambda; - T* in_pos = &in[out_id_h * input_w + channel_id * in_img_size + - in_img_idy * in_img_w + in_img_idx]; + T* in_pos; + if (data_layout == DataLayout::kNCHW) { + in_pos = &in[out_id_h * input_w + channel_id * in_img_size + + in_img_idy * in_img_w + in_img_idx]; + } else { + in_pos = &in[out_id_h * input_w + in_img_idy * in_img_w * num_channels + + in_img_idx * num_channels + channel_id]; + } + const T* out_pos = &out[out_id_h * output_w + out_id_w]; - platform::CudaAtomicAdd(&in_pos[0], h2lambda * w2lambda * out_pos[0]); - platform::CudaAtomicAdd(&in_pos[w_id], h2lambda * w1lambda * out_pos[0]); - platform::CudaAtomicAdd(&in_pos[h_id * in_img_w], - h1lambda * w2lambda * out_pos[0]); - platform::CudaAtomicAdd(&in_pos[h_id * in_img_w + w_id], - h1lambda * w1lambda * out_pos[0]); + + if (data_layout == DataLayout::kNCHW) { + platform::CudaAtomicAdd(&in_pos[0], h2lambda * w2lambda * out_pos[0]); + platform::CudaAtomicAdd(&in_pos[w_id], h2lambda * w1lambda * out_pos[0]); + platform::CudaAtomicAdd(&in_pos[h_id * in_img_w], + h1lambda * w2lambda * out_pos[0]); + platform::CudaAtomicAdd(&in_pos[h_id * in_img_w + w_id], + h1lambda * w1lambda * out_pos[0]); + } else { + platform::CudaAtomicAdd(&in_pos[0], h2lambda * w2lambda * out_pos[0]); + platform::CudaAtomicAdd(&in_pos[w_id * num_channels], + h2lambda * w1lambda * out_pos[0]); + platform::CudaAtomicAdd(&in_pos[h_id * in_img_w * num_channels], + h1lambda * w2lambda * out_pos[0]); + platform::CudaAtomicAdd( + &in_pos[h_id * in_img_w * num_channels + w_id * num_channels], + h1lambda * w1lambda * out_pos[0]); + } } } @@ -197,7 +273,8 @@ __global__ void KeTrilinearInterpFw( const size_t out_img_d, const size_t out_img_h, const size_t out_img_w, const size_t output_h, const size_t output_w, const size_t num_channels, const float ratio_d, const float ratio_h, const float ratio_w, - const bool align_corners, const int align_mode) { + const bool align_corners, const int align_mode, + const DataLayout data_layout) { int nthreads = output_h * output_w; int tid = blockIdx.x * blockDim.x + threadIdx.x; int stride = blockDim.x * gridDim.x; @@ -207,9 +284,21 @@ __global__ void KeTrilinearInterpFw( int out_id_w = tid % output_w; int in_img_size = input_w / num_channels; int out_img_size = output_w / num_channels; - int channel_id = out_id_w / out_img_size; - int out_img_idt = (out_id_w % out_img_size) / out_img_h / out_img_w; + int channel_id, out_img_idt, out_img_idy, out_img_idx; + if (data_layout == DataLayout::kNCHW) { + channel_id = out_id_w / out_img_size; + out_img_idt = (out_id_w % out_img_size) / out_img_h / out_img_w; + out_img_idy = ((out_id_w % out_img_size) / out_img_w) % out_img_h; + out_img_idx = tid % out_img_w; + } else { + out_img_idt = out_id_w / (out_img_h * out_img_w * num_channels); + out_img_idy = out_id_w % (out_img_h * out_img_w * num_channels) / + (out_img_w * num_channels); + out_img_idx = out_id_w % (out_img_w * num_channels) / num_channels; + channel_id = tid % num_channels; + } + int in_img_idt = align_flag ? static_cast(ratio_d * (out_img_idt + 0.5) - 0.5) : static_cast(ratio_d * out_img_idt); @@ -221,7 +310,6 @@ __global__ void KeTrilinearInterpFw( align_flag ? src_d - in_img_idt : ratio_d * out_img_idt - in_img_idt; T d2lambda = 1.f - d1lambda; - int out_img_idy = ((out_id_w % out_img_size) / out_img_w) % out_img_h; int in_img_idy = align_flag ? static_cast(ratio_h * (out_img_idy + 0.5) - 0.5) : static_cast(ratio_h * out_img_idy); @@ -233,7 +321,6 @@ __global__ void KeTrilinearInterpFw( align_flag ? src_h - in_img_idy : ratio_h * out_img_idy - in_img_idy; T h2lambda = 1.f - h1lambda; - int out_img_idx = tid % out_img_w; int in_img_idx = align_flag ? static_cast(ratio_w * (out_img_idx + 0.5) - 0.5) : static_cast(ratio_w * out_img_idx); @@ -245,23 +332,49 @@ __global__ void KeTrilinearInterpFw( align_flag ? src_w - in_img_idx : ratio_w * out_img_idx - in_img_idx; T w2lambda = 1.f - w1lambda; - int in_pos1_idx = out_id_h * input_w + channel_id * in_img_size + - (in_img_idt * in_img_h + in_img_idy) * in_img_w + - in_img_idx; - const T* in_pos1 = &in[in_pos1_idx]; - int in_pos2_idx = in_pos1_idx + d_id * in_img_h * in_img_w; - const T* in_pos2 = &in[in_pos2_idx]; - - // trilinear interpolation - out[out_id_h * output_w + out_id_w] = - d2lambda * - (h2lambda * (w2lambda * in_pos1[0] + w1lambda * in_pos1[w_id]) + - h1lambda * (w2lambda * in_pos1[h_id * in_img_w] + - w1lambda * in_pos1[h_id * in_img_w + w_id])) + - d1lambda * - (h2lambda * (w2lambda * in_pos2[0] + w1lambda * in_pos2[w_id]) + - h1lambda * (w2lambda * in_pos2[h_id * in_img_w] + - w1lambda * in_pos2[h_id * in_img_w + w_id])); + if (data_layout == DataLayout::kNCHW) { + int in_pos1_idx = out_id_h * input_w + channel_id * in_img_size + + (in_img_idt * in_img_h + in_img_idy) * in_img_w + + in_img_idx; + const T* in_pos1 = &in[in_pos1_idx]; + int in_pos2_idx = in_pos1_idx + d_id * in_img_h * in_img_w; + const T* in_pos2 = &in[in_pos2_idx]; + + // trilinear interpolation + out[out_id_h * output_w + out_id_w] = + d2lambda * + (h2lambda * (w2lambda * in_pos1[0] + w1lambda * in_pos1[w_id]) + + h1lambda * (w2lambda * in_pos1[h_id * in_img_w] + + w1lambda * in_pos1[h_id * in_img_w + w_id])) + + d1lambda * + (h2lambda * (w2lambda * in_pos2[0] + w1lambda * in_pos2[w_id]) + + h1lambda * (w2lambda * in_pos2[h_id * in_img_w] + + w1lambda * in_pos2[h_id * in_img_w + w_id])); + + } else { + int in_pos1_idx = out_id_h * input_w + + in_img_idt * in_img_h * in_img_w * num_channels + + in_img_idy * in_img_w * num_channels + + in_img_idx * num_channels + channel_id; + const T* in_pos1 = &in[in_pos1_idx]; + int in_pos2_idx = in_pos1_idx + d_id * in_img_h * in_img_w * num_channels; + const T* in_pos2 = &in[in_pos2_idx]; + + // trilinear interpolation + out[out_id_h * output_w + out_id_w] = + d2lambda * + (h2lambda * (w2lambda * in_pos1[0] + + w1lambda * in_pos1[w_id * num_channels]) + + h1lambda * (w2lambda * in_pos1[h_id * in_img_w * num_channels] + + w1lambda * in_pos1[h_id * in_img_w * num_channels + + w_id * num_channels])) + + d1lambda * + (h2lambda * (w2lambda * in_pos2[0] + + w1lambda * in_pos2[w_id * num_channels]) + + h1lambda * (w2lambda * in_pos2[h_id * in_img_w * num_channels] + + w1lambda * in_pos2[h_id * in_img_w * num_channels + + w_id * num_channels])); + } } } @@ -272,7 +385,7 @@ __global__ void KeTrilinearInterpBw( const size_t out_img_d, const size_t out_img_h, const size_t out_img_w, const size_t output_h, const size_t output_w, const size_t num_channels, const T ratio_d, const T ratio_h, const T ratio_w, const bool align_corners, - const int align_mode) { + const int align_mode, const DataLayout data_layout) { int nthreads = output_h * output_w; int tid = blockIdx.x * blockDim.x + threadIdx.x; int stride = blockDim.x * gridDim.x; @@ -282,9 +395,21 @@ __global__ void KeTrilinearInterpBw( int out_id_w = tid % output_w; int in_img_size = input_w / num_channels; int out_img_size = output_w / num_channels; - int channel_id = out_id_w / out_img_size; - int out_img_idt = (out_id_w % out_img_size) / out_img_h / out_img_w; + int channel_id, out_img_idt, out_img_idy, out_img_idx; + if (data_layout == DataLayout::kNCHW) { + channel_id = out_id_w / out_img_size; + out_img_idt = (out_id_w % out_img_size) / out_img_h / out_img_w; + out_img_idy = ((out_id_w % out_img_size) / out_img_w) % out_img_h; + out_img_idx = tid % out_img_w; + } else { + out_img_idt = out_id_w / (out_img_h * out_img_w * num_channels); + out_img_idy = out_id_w % (out_img_h * out_img_w * num_channels) / + (out_img_w * num_channels); + out_img_idx = out_id_w % (out_img_w * num_channels) / num_channels; + channel_id = tid % num_channels; + } + int in_img_idt = align_flag ? static_cast(ratio_d * (out_img_idt + 0.5) - 0.5) : static_cast(ratio_d * out_img_idt); @@ -296,7 +421,6 @@ __global__ void KeTrilinearInterpBw( align_flag ? src_d - in_img_idt : ratio_d * out_img_idt - in_img_idt; T d2lambda = 1.f - d1lambda; - int out_img_idy = ((out_id_w % out_img_size) / out_img_w) % out_img_h; int in_img_idy = align_flag ? static_cast(ratio_h * (out_img_idy + 0.5) - 0.5) : static_cast(ratio_h * out_img_idy); @@ -308,7 +432,6 @@ __global__ void KeTrilinearInterpBw( align_flag ? src_h - in_img_idy : ratio_h * out_img_idy - in_img_idy; T h2lambda = 1.f - h1lambda; - int out_img_idx = tid % out_img_w; int in_img_idx = align_flag ? static_cast(ratio_w * (out_img_idx + 0.5) - 0.5) : static_cast(ratio_w * out_img_idx); @@ -320,32 +443,64 @@ __global__ void KeTrilinearInterpBw( align_flag ? src_w - in_img_idx : ratio_w * out_img_idx - in_img_idx; T w2lambda = 1.f - w1lambda; - int in_pos1_idx = out_id_h * input_w + channel_id * in_img_size + - (in_img_idt * in_img_h + in_img_idy) * in_img_w + - in_img_idx; - T* in_pos1 = &in[in_pos1_idx]; - int in_pos2_idx = in_pos1_idx + d_id * in_img_h * in_img_w; - T* in_pos2 = &in[in_pos2_idx]; - - const T* out_pos = &out[out_id_h * output_w + out_id_w]; - - // trilinear interpolation grad - platform::CudaAtomicAdd(&in_pos1[0], - d2lambda * h2lambda * w2lambda * out_pos[0]); - platform::CudaAtomicAdd(&in_pos1[w_id], - d2lambda * h2lambda * w1lambda * out_pos[0]); - platform::CudaAtomicAdd(&in_pos1[h_id * in_img_w], - d2lambda * h1lambda * w2lambda * out_pos[0]); - platform::CudaAtomicAdd(&in_pos1[h_id * in_img_w + w_id], - d2lambda * h1lambda * w1lambda * out_pos[0]); - platform::CudaAtomicAdd(&in_pos2[0], - d1lambda * h2lambda * w2lambda * out_pos[0]); - platform::CudaAtomicAdd(&in_pos2[w_id], - d1lambda * h2lambda * w1lambda * out_pos[0]); - platform::CudaAtomicAdd(&in_pos2[h_id * in_img_w], - d1lambda * h1lambda * w2lambda * out_pos[0]); - platform::CudaAtomicAdd(&in_pos2[h_id * in_img_w + w_id], - d1lambda * h1lambda * w1lambda * out_pos[0]); + if (data_layout == DataLayout::kNCHW) { + int in_pos1_idx = out_id_h * input_w + channel_id * in_img_size + + (in_img_idt * in_img_h + in_img_idy) * in_img_w + + in_img_idx; + T* in_pos1 = &in[in_pos1_idx]; + int in_pos2_idx = in_pos1_idx + d_id * in_img_h * in_img_w; + T* in_pos2 = &in[in_pos2_idx]; + + const T* out_pos = &out[out_id_h * output_w + out_id_w]; + + // trilinear interpolation grad + platform::CudaAtomicAdd(&in_pos1[0], + d2lambda * h2lambda * w2lambda * out_pos[0]); + platform::CudaAtomicAdd(&in_pos1[w_id], + d2lambda * h2lambda * w1lambda * out_pos[0]); + platform::CudaAtomicAdd(&in_pos1[h_id * in_img_w], + d2lambda * h1lambda * w2lambda * out_pos[0]); + platform::CudaAtomicAdd(&in_pos1[h_id * in_img_w + w_id], + d2lambda * h1lambda * w1lambda * out_pos[0]); + platform::CudaAtomicAdd(&in_pos2[0], + d1lambda * h2lambda * w2lambda * out_pos[0]); + platform::CudaAtomicAdd(&in_pos2[w_id], + d1lambda * h2lambda * w1lambda * out_pos[0]); + platform::CudaAtomicAdd(&in_pos2[h_id * in_img_w], + d1lambda * h1lambda * w2lambda * out_pos[0]); + platform::CudaAtomicAdd(&in_pos2[h_id * in_img_w + w_id], + d1lambda * h1lambda * w1lambda * out_pos[0]); + } else { + int in_pos1_idx = out_id_h * input_w + + in_img_idt * in_img_h * in_img_w * num_channels + + in_img_idy * in_img_w * num_channels + + in_img_idx * num_channels + channel_id; + T* in_pos1 = &in[in_pos1_idx]; + int in_pos2_idx = in_pos1_idx + d_id * in_img_h * in_img_w * num_channels; + T* in_pos2 = &in[in_pos2_idx]; + + const T* out_pos = &out[out_id_h * output_w + out_id_w]; + + // trilinear interpolation grad + platform::CudaAtomicAdd(&in_pos1[0], + d2lambda * h2lambda * w2lambda * out_pos[0]); + platform::CudaAtomicAdd(&in_pos1[w_id * num_channels], + d2lambda * h2lambda * w1lambda * out_pos[0]); + platform::CudaAtomicAdd(&in_pos1[h_id * in_img_w * num_channels], + d2lambda * h1lambda * w2lambda * out_pos[0]); + platform::CudaAtomicAdd( + &in_pos1[h_id * in_img_w * num_channels + w_id * num_channels], + d2lambda * h1lambda * w1lambda * out_pos[0]); + platform::CudaAtomicAdd(&in_pos2[0], + d1lambda * h2lambda * w2lambda * out_pos[0]); + platform::CudaAtomicAdd(&in_pos2[w_id * num_channels], + d1lambda * h2lambda * w1lambda * out_pos[0]); + platform::CudaAtomicAdd(&in_pos2[h_id * in_img_w * num_channels], + d1lambda * h1lambda * w2lambda * out_pos[0]); + platform::CudaAtomicAdd( + &in_pos2[h_id * in_img_w * num_channels + w_id * num_channels], + d1lambda * h1lambda * w1lambda * out_pos[0]); + } } } @@ -354,10 +509,10 @@ static void Interpolate2DCUDAFwd(const framework::ExecutionContext& ctx, const Tensor& input, Tensor* output) { auto* input_data = input.data(); - const int n = input.dims()[0]; - const int c = input.dims()[1]; - const int in_h = input.dims()[2]; - const int in_w = input.dims()[3]; + const std::string data_layout_str = ctx.Attr("data_layout"); + const DataLayout data_layout = framework::StringToDataLayout(data_layout_str); + int n, c, in_d, in_h, in_w; + ExtractNCDWH(input.dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); auto interp_method = ctx.Attr("interp_method"); bool align_corners = ctx.Attr("align_corners"); @@ -401,8 +556,13 @@ static void Interpolate2DCUDAFwd(const framework::ExecutionContext& ctx, out_w, 0, "out_w in Attr(out_shape) of Op(interpolate) should be greater than 0."); - auto output_data = - output->mutable_data({n, c, out_h, out_w}, ctx.GetPlace()); + framework::DDim dim_out; + if (data_layout == DataLayout::kNCHW) { + dim_out = {n, c, out_h, out_w}; + } else { + dim_out = {n, out_h, out_w, c}; + } + auto output_data = output->mutable_data(dim_out, ctx.GetPlace()); if (in_h == out_h && in_w == out_w) { framework::TensorCopy(input, ctx.GetPlace(), output); @@ -433,12 +593,12 @@ static void Interpolate2DCUDAFwd(const framework::ExecutionContext& ctx, KeNearestNeighborInterpFw< T><<>>( input_data, in_h, in_w, n, in_chw, output_data, out_h, out_w, n, - out_chw, c, ratio_h, ratio_w, align_corners); + out_chw, c, ratio_h, ratio_w, align_corners, data_layout); } else if ("bilinear" == interp_method) { KeBilinearInterpFw< T><<>>( input_data, in_h, in_w, n, in_chw, output_data, out_h, out_w, n, - out_chw, c, ratio_h, ratio_w, align_corners, align_mode); + out_chw, c, ratio_h, ratio_w, align_corners, align_mode, data_layout); } } @@ -447,11 +607,10 @@ static void Interpolate3DCUDAFwd(const framework::ExecutionContext& ctx, const Tensor& input, Tensor* output) { auto* input_data = input.data(); - const int n = input.dims()[0]; - const int c = input.dims()[1]; - const int in_d = input.dims()[2]; - const int in_h = input.dims()[3]; - const int in_w = input.dims()[4]; + const std::string data_layout_str = ctx.Attr("data_layout"); + const DataLayout data_layout = framework::StringToDataLayout(data_layout_str); + int n, c, in_d, in_h, in_w; + ExtractNCDWH(input.dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); auto interp_method = ctx.Attr("interp_method"); bool align_corners = ctx.Attr("align_corners"); @@ -502,8 +661,13 @@ static void Interpolate3DCUDAFwd(const framework::ExecutionContext& ctx, out_w, 0, "out_w in Attr(out_shape) of Op(interpolate) should be greater than 0."); - auto output_data = - output->mutable_data({n, c, out_d, out_h, out_w}, ctx.GetPlace()); + framework::DDim dim_out; + if (data_layout == DataLayout::kNCHW) { + dim_out = {n, c, out_d, out_h, out_w}; + } else { + dim_out = {n, out_d, out_h, out_w, c}; + } + auto output_data = output->mutable_data(dim_out, ctx.GetPlace()); if (in_d == out_d && in_h == out_h && in_w == out_w) { framework::TensorCopy(input, ctx.GetPlace(), output); @@ -540,7 +704,7 @@ static void Interpolate3DCUDAFwd(const framework::ExecutionContext& ctx, T><<>>( input_data, in_d, in_h, in_w, n, in_cdhw, output_data, out_d, out_h, out_w, n, out_cdhw, c, ratio_d, ratio_h, ratio_w, align_corners, - align_mode); + align_mode, data_layout); } } @@ -548,10 +712,10 @@ template static void Interpolate2DCUDABwd(const framework::ExecutionContext& ctx, Tensor* input_grad, const Tensor output_grad) { auto* input = ctx.Input("X"); - const int n = input->dims()[0]; - const int c = input->dims()[1]; - const int in_h = input->dims()[2]; - const int in_w = input->dims()[3]; + const std::string data_layout_str = ctx.Attr("data_layout"); + const DataLayout data_layout = framework::StringToDataLayout(data_layout_str); + int n, c, in_d, in_h, in_w; + ExtractNCDWH(input->dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); auto interp_method = ctx.Attr("interp_method"); bool align_corners = ctx.Attr("align_corners"); @@ -589,8 +753,14 @@ static void Interpolate2DCUDABwd(const framework::ExecutionContext& ctx, } auto* output_grad_data = output_grad.data(); - auto* input_grad_data = - input_grad->mutable_data({n, c, in_h, in_w}, ctx.GetPlace()); + framework::DDim dim_grad; + if (data_layout == DataLayout::kNCHW) { + dim_grad = {n, c, in_h, in_w}; + } else { + dim_grad = {n, in_h, in_w, c}; + } + input_grad->mutable_data(dim_grad, ctx.GetPlace()); + auto* input_grad_data = input_grad->mutable_data(dim_grad, ctx.GetPlace()); auto& device_ctx = ctx.template device_context(); math::SetConstant zero; zero(device_ctx, input_grad, static_cast(0.0)); @@ -624,12 +794,13 @@ static void Interpolate2DCUDABwd(const framework::ExecutionContext& ctx, KeNearestNeighborInterpBw< T><<>>( input_grad_data, in_h, in_w, n, in_chw, output_grad_data, out_h, out_w, - n, out_chw, c, ratio_h, ratio_w, align_corners); + n, out_chw, c, ratio_h, ratio_w, align_corners, data_layout); } else if ("bilinear" == interp_method) { KeBilinearInterpBw< T><<>>( input_grad_data, in_h, in_w, n, in_chw, output_grad_data, out_h, out_w, - n, out_chw, c, ratio_h, ratio_w, align_corners, align_mode); + n, out_chw, c, ratio_h, ratio_w, align_corners, align_mode, + data_layout); } } @@ -638,11 +809,10 @@ static void Interpolate3DCUDABwd(const framework::ExecutionContext& ctx, Tensor* input_grad, const Tensor& output_grad) { auto* input = ctx.Input("X"); - const int n = input->dims()[0]; - const int c = input->dims()[1]; - const int in_d = input->dims()[2]; - const int in_h = input->dims()[3]; - const int in_w = input->dims()[4]; + const std::string data_layout_str = ctx.Attr("data_layout"); + const DataLayout data_layout = framework::StringToDataLayout(data_layout_str); + int n, c, in_d, in_h, in_w; + ExtractNCDWH(input->dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); auto interp_method = ctx.Attr("interp_method"); bool align_corners = ctx.Attr("align_corners"); @@ -684,8 +854,13 @@ static void Interpolate3DCUDABwd(const framework::ExecutionContext& ctx, } auto* output_grad_data = output_grad.data(); - auto* input_grad_data = - input_grad->mutable_data({n, c, in_d, in_h, in_w}, ctx.GetPlace()); + framework::DDim dim_grad; + if (data_layout == DataLayout::kNCHW) { + dim_grad = {n, c, in_d, in_h, in_w}; + } else { + dim_grad = {n, in_d, in_h, in_w, c}; + } + auto* input_grad_data = input_grad->mutable_data(dim_grad, ctx.GetPlace()); auto& device_ctx = ctx.template device_context(); math::SetConstant zero; zero(device_ctx, input_grad, static_cast(0.0)); @@ -725,7 +900,7 @@ static void Interpolate3DCUDABwd(const framework::ExecutionContext& ctx, T><<>>( input_grad_data, in_d, in_h, in_w, n, in_cdhw, output_grad_data, out_d, out_h, out_w, n, out_cdhw, c, ratio_d, ratio_h, ratio_w, align_corners, - align_mode); + align_mode, data_layout); } } diff --git a/paddle/fluid/operators/interpolate_op.h b/paddle/fluid/operators/interpolate_op.h index b87c41256d..b107d1e665 100644 --- a/paddle/fluid/operators/interpolate_op.h +++ b/paddle/fluid/operators/interpolate_op.h @@ -22,6 +22,7 @@ template using EigenTensor = framework::EigenTensor; using Tensor = framework::Tensor; +using DataLayout = framework::DataLayout; inline std::vector get_new_shape( const std::vector& list_new_shape_tensor) { @@ -57,12 +58,30 @@ inline std::vector get_new_data_from_tensor(const Tensor* new_data_tensor) { return vec_new_data; } +inline void ExtractNCDWH(const framework::DDim& dims, + const DataLayout& data_layout, int* N, int* C, int* D, + int* H, int* W) { + *N = dims[0]; + if (dims.size() == 4) { + *C = data_layout == DataLayout::kNCHW ? dims[1] : dims[3]; + *D = 1; + *H = data_layout == DataLayout::kNCHW ? dims[2] : dims[1]; + *W = data_layout == DataLayout::kNCHW ? dims[3] : dims[2]; + } else { + *C = data_layout == DataLayout::kNCHW ? dims[1] : dims[4]; + *D = data_layout == DataLayout::kNCHW ? dims[2] : dims[1]; + *H = data_layout == DataLayout::kNCHW ? dims[3] : dims[2]; + *W = data_layout == DataLayout::kNCHW ? dims[4] : dims[3]; + } +} + template static void NearestNeighborInterpolate(const Tensor& input, Tensor* output, const float ratio_h, const float ratio_w, const int n, const int c, const int out_h, const int out_w, - const bool align_corners) { + const bool align_corners, + const DataLayout& data_layout) { auto input_t = EigenTensor::From(input); auto output_t = EigenTensor::From(*output); for (int k = 0; k < out_h; k++) { // loop for images @@ -75,7 +94,11 @@ static void NearestNeighborInterpolate(const Tensor& input, Tensor* output, for (int i = 0; i < n; i++) { // loop for batches for (int j = 0; j < c; j++) { // loop for channels - output_t(i, j, k, l) = input_t(i, j, in_k, in_l); + if (data_layout == DataLayout::kNCHW) { + output_t(i, j, k, l) = input_t(i, j, in_k, in_l); + } else { + output_t(i, k, l, j) = input_t(i, in_k, in_l, j); + } } } } @@ -88,7 +111,8 @@ static void BilinearInterpolation(const Tensor& input, Tensor* output, const int in_h, const int in_w, const int n, const int c, const int out_h, const int out_w, const bool align_corners, - const bool align_mode) { + const bool align_mode, + const DataLayout data_layout) { auto input_t = EigenTensor::From(input); auto output_t = EigenTensor::From(*output); bool align_flag = (align_mode == 0 && !align_corners); @@ -154,11 +178,21 @@ static void BilinearInterpolation(const Tensor& input, Tensor* output, for (int k = 0; k < out_h; k++) { // loop for images for (int l = 0; l < out_w; l++) { // bilinear interpolation - T out_t = input_t(i, j, vy_n[k], vx_w[l]) * vd_s[k] * vd_e[l] + + T out_t; + if (data_layout == DataLayout::kNCHW) { + out_t = input_t(i, j, vy_n[k], vx_w[l]) * vd_s[k] * vd_e[l] + input_t(i, j, vy_s[k], vx_w[l]) * vd_n[k] * vd_e[l] + input_t(i, j, vy_n[k], vx_e[l]) * vd_s[k] * vd_w[l] + input_t(i, j, vy_s[k], vx_e[l]) * vd_n[k] * vd_w[l]; - output_t(i, j, k, l) = out_t; + output_t(i, j, k, l) = out_t; + + } else { + out_t = input_t(i, vy_n[k], vx_w[l], j) * vd_s[k] * vd_e[l] + + input_t(i, vy_s[k], vx_w[l], j) * vd_n[k] * vd_e[l] + + input_t(i, vy_n[k], vx_e[l], j) * vd_s[k] * vd_w[l] + + input_t(i, vy_s[k], vx_e[l], j) * vd_n[k] * vd_w[l]; + output_t(i, k, l, j) = out_t; + } } } } @@ -170,7 +204,8 @@ static void TrilinearInterpolation( const Tensor& input, Tensor* output, const float ratio_d, const float ratio_h, const float ratio_w, const int in_d, const int in_h, const int in_w, const int n, const int c, const int out_d, const int out_h, - const int out_w, const bool align_corners, const bool align_mode) { + const int out_w, const bool align_corners, const bool align_mode, + const DataLayout& data_layout) { auto input_t = EigenTensor::From(input); auto output_t = EigenTensor::From(*output); bool align_flag = (align_mode == 0 && !align_corners); @@ -263,23 +298,43 @@ static void TrilinearInterpolation( for (int k = 0; k < out_h; k++) { for (int l = 0; l < out_w; l++) { // trilinear interpolation - T out_t = input_t(b, i, vt_f[j], vy_n[k], vx_w[l]) * vd_b[j] * - vd_s[k] * vd_e[l] + - input_t(b, i, vt_f[j], vy_n[k], vx_e[l]) * vd_b[j] * - vd_s[k] * vd_w[l] + - input_t(b, i, vt_f[j], vy_s[k], vx_w[l]) * vd_b[j] * - vd_n[k] * vd_e[l] + - input_t(b, i, vt_f[j], vy_s[k], vx_e[l]) * vd_b[j] * - vd_n[k] * vd_w[l] + - input_t(b, i, vt_b[j], vy_n[k], vx_w[l]) * vd_f[j] * - vd_s[k] * vd_e[l] + - input_t(b, i, vt_b[j], vy_n[k], vx_e[l]) * vd_f[j] * - vd_s[k] * vd_w[l] + - input_t(b, i, vt_b[j], vy_s[k], vx_w[l]) * vd_f[j] * - vd_n[k] * vd_e[l] + - input_t(b, i, vt_b[j], vy_s[k], vx_e[l]) * vd_f[j] * - vd_n[k] * vd_w[l]; - output_t(b, i, j, k, l) = out_t; + if (data_layout == DataLayout::kNCHW) { + T out_t = input_t(b, i, vt_f[j], vy_n[k], vx_w[l]) * vd_b[j] * + vd_s[k] * vd_e[l] + + input_t(b, i, vt_f[j], vy_n[k], vx_e[l]) * vd_b[j] * + vd_s[k] * vd_w[l] + + input_t(b, i, vt_f[j], vy_s[k], vx_w[l]) * vd_b[j] * + vd_n[k] * vd_e[l] + + input_t(b, i, vt_f[j], vy_s[k], vx_e[l]) * vd_b[j] * + vd_n[k] * vd_w[l] + + input_t(b, i, vt_b[j], vy_n[k], vx_w[l]) * vd_f[j] * + vd_s[k] * vd_e[l] + + input_t(b, i, vt_b[j], vy_n[k], vx_e[l]) * vd_f[j] * + vd_s[k] * vd_w[l] + + input_t(b, i, vt_b[j], vy_s[k], vx_w[l]) * vd_f[j] * + vd_n[k] * vd_e[l] + + input_t(b, i, vt_b[j], vy_s[k], vx_e[l]) * vd_f[j] * + vd_n[k] * vd_w[l]; + output_t(b, i, j, k, l) = out_t; + } else { + T out_t = input_t(b, vt_f[j], vy_n[k], vx_w[l], i) * vd_b[j] * + vd_s[k] * vd_e[l] + + input_t(b, vt_f[j], vy_n[k], vx_e[l], i) * vd_b[j] * + vd_s[k] * vd_w[l] + + input_t(b, vt_f[j], vy_s[k], vx_w[l], i) * vd_b[j] * + vd_n[k] * vd_e[l] + + input_t(b, vt_f[j], vy_s[k], vx_e[l], i) * vd_b[j] * + vd_n[k] * vd_w[l] + + input_t(b, vt_b[j], vy_n[k], vx_w[l], i) * vd_f[j] * + vd_s[k] * vd_e[l] + + input_t(b, vt_b[j], vy_n[k], vx_e[l], i) * vd_f[j] * + vd_s[k] * vd_w[l] + + input_t(b, vt_b[j], vy_s[k], vx_w[l], i) * vd_f[j] * + vd_n[k] * vd_e[l] + + input_t(b, vt_b[j], vy_s[k], vx_e[l], i) * vd_f[j] * + vd_n[k] * vd_w[l]; + output_t(b, j, k, l, i) = out_t; + } } } } @@ -291,7 +346,7 @@ template static void NearestNeighborInterpolateGrad( const Tensor& output_grad, Tensor* input_grad, const float ratio_h, const float ratio_w, const int n, const int c, const int out_h, - const int out_w, const bool align_corners) { + const int out_w, const bool align_corners, const DataLayout data_layout) { auto input_grad_t = EigenTensor::From(*input_grad); auto output_grad_t = EigenTensor::From(output_grad); @@ -305,7 +360,11 @@ static void NearestNeighborInterpolateGrad( for (int i = 0; i < n; i++) { // loop for batches for (int j = 0; j < c; j++) { // loop for channels - input_grad_t(i, j, in_k, in_l) += output_grad_t(i, j, k, l); + if (data_layout == DataLayout::kNCHW) { + input_grad_t(i, j, in_k, in_l) += output_grad_t(i, j, k, l); + } else { + input_grad_t(i, in_k, in_l, j) += output_grad_t(i, k, l, j); + } } } } @@ -313,13 +372,11 @@ static void NearestNeighborInterpolateGrad( } template -static void BilinearInterpolationGrad(const Tensor& output_grad, - Tensor* input_grad, const float ratio_h, - const float ratio_w, const int in_h, - const int in_w, const int n, const int c, - const int out_h, const int out_w, - const bool align_corners, - const int align_mode) { +static void BilinearInterpolationGrad( + const Tensor& output_grad, Tensor* input_grad, const float ratio_h, + const float ratio_w, const int in_h, const int in_w, const int n, + const int c, const int out_h, const int out_w, const bool align_corners, + const int align_mode, const DataLayout data_layout) { auto input_grad_t = EigenTensor::From(*input_grad); auto output_grad_t = EigenTensor::From(output_grad); bool align_flag = (align_mode == 0 && !align_corners); @@ -346,11 +403,19 @@ static void BilinearInterpolationGrad(const Tensor& output_grad, for (int i = 0; i < n; i++) { // loop for batches for (int j = 0; j < c; j++) { // loop for channels // bilinear interpolation grad - const T grad = output_grad_t(i, j, k, l); - input_grad_t(i, j, y_n, x_w) += static_cast(grad * d_s * d_e); - input_grad_t(i, j, y_s, x_w) += static_cast(grad * d_n * d_e); - input_grad_t(i, j, y_n, x_e) += static_cast(grad * d_s * d_w); - input_grad_t(i, j, y_s, x_e) += static_cast(grad * d_n * d_w); + if (data_layout == DataLayout::kNCHW) { + const T grad = output_grad_t(i, j, k, l); + input_grad_t(i, j, y_n, x_w) += static_cast(grad * d_s * d_e); + input_grad_t(i, j, y_s, x_w) += static_cast(grad * d_n * d_e); + input_grad_t(i, j, y_n, x_e) += static_cast(grad * d_s * d_w); + input_grad_t(i, j, y_s, x_e) += static_cast(grad * d_n * d_w); + } else { + const T grad = output_grad_t(i, k, l, j); + input_grad_t(i, y_n, x_w, j) += static_cast(grad * d_s * d_e); + input_grad_t(i, y_s, x_w, j) += static_cast(grad * d_n * d_e); + input_grad_t(i, y_n, x_e, j) += static_cast(grad * d_s * d_w); + input_grad_t(i, y_s, x_e, j) += static_cast(grad * d_n * d_w); + } } } } @@ -362,7 +427,8 @@ static void TrilinearInterpolationGrad( const Tensor& output_grad, Tensor* input_grad, const float ratio_d, const float ratio_h, const float ratio_w, const int in_d, const int in_h, const int in_w, const int n, const int c, const int out_d, const int out_h, - const int out_w, const bool align_corners, const int align_mode) { + const int out_w, const bool align_corners, const int align_mode, + const DataLayout data_layout) { auto input_grad_t = EigenTensor::From(*input_grad); auto output_grad_t = EigenTensor::From(output_grad); bool align_flag = (align_mode == 0 && !align_corners); @@ -399,23 +465,43 @@ static void TrilinearInterpolationGrad( for (int b = 0; b < n; b++) { // loop for batches for (int i = 0; i < c; i++) { // loop for channels // trilinear interpolation grad - const T grad = output_grad_t(b, i, j, k, l); - input_grad_t(b, i, t_f, y_n, x_w) += - static_cast(grad * d_b * d_s * d_e); - input_grad_t(b, i, t_f, y_n, x_e) += - static_cast(grad * d_b * d_s * d_w); - input_grad_t(b, i, t_f, y_s, x_w) += - static_cast(grad * d_b * d_n * d_e); - input_grad_t(b, i, t_f, y_s, x_e) += - static_cast(grad * d_b * d_n * d_w); - input_grad_t(b, i, t_b, y_n, x_w) += - static_cast(grad * d_f * d_s * d_e); - input_grad_t(b, i, t_b, y_n, x_e) += - static_cast(grad * d_f * d_s * d_w); - input_grad_t(b, i, t_b, y_s, x_w) += - static_cast(grad * d_f * d_n * d_e); - input_grad_t(b, i, t_b, y_s, x_e) += - static_cast(grad * d_f * d_n * d_w); + if (data_layout == DataLayout::kNCHW) { + const T grad = output_grad_t(b, i, j, k, l); + input_grad_t(b, i, t_f, y_n, x_w) += + static_cast(grad * d_b * d_s * d_e); + input_grad_t(b, i, t_f, y_n, x_e) += + static_cast(grad * d_b * d_s * d_w); + input_grad_t(b, i, t_f, y_s, x_w) += + static_cast(grad * d_b * d_n * d_e); + input_grad_t(b, i, t_f, y_s, x_e) += + static_cast(grad * d_b * d_n * d_w); + input_grad_t(b, i, t_b, y_n, x_w) += + static_cast(grad * d_f * d_s * d_e); + input_grad_t(b, i, t_b, y_n, x_e) += + static_cast(grad * d_f * d_s * d_w); + input_grad_t(b, i, t_b, y_s, x_w) += + static_cast(grad * d_f * d_n * d_e); + input_grad_t(b, i, t_b, y_s, x_e) += + static_cast(grad * d_f * d_n * d_w); + } else { + const T grad = output_grad_t(b, j, k, l, i); + input_grad_t(b, t_f, y_n, x_w, i) += + static_cast(grad * d_b * d_s * d_e); + input_grad_t(b, t_f, y_n, x_e, i) += + static_cast(grad * d_b * d_s * d_w); + input_grad_t(b, t_f, y_s, x_w, i) += + static_cast(grad * d_b * d_n * d_e); + input_grad_t(b, t_f, y_s, x_e, i) += + static_cast(grad * d_b * d_n * d_w); + input_grad_t(b, t_b, y_n, x_w, i) += + static_cast(grad * d_f * d_s * d_e); + input_grad_t(b, t_b, y_n, x_e, i) += + static_cast(grad * d_f * d_s * d_w); + input_grad_t(b, t_b, y_s, x_w, i) += + static_cast(grad * d_f * d_n * d_e); + input_grad_t(b, t_b, y_s, x_e, i) += + static_cast(grad * d_f * d_n * d_w); + } } } } @@ -426,10 +512,10 @@ static void TrilinearInterpolationGrad( template static void Interpolate2DCPUFwd(const framework::ExecutionContext& ctx, const Tensor& input, Tensor* output) { - const int n = input.dims()[0]; - const int c = input.dims()[1]; - const int in_h = input.dims()[2]; - const int in_w = input.dims()[3]; + const std::string data_layout_str = ctx.Attr("data_layout"); + const DataLayout data_layout = framework::StringToDataLayout(data_layout_str); + int n, c, in_d, in_h, in_w; + ExtractNCDWH(input.dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); auto interp_method = ctx.Attr("interp_method"); bool align_corners = ctx.Attr("align_corners"); @@ -470,7 +556,13 @@ static void Interpolate2DCPUFwd(const framework::ExecutionContext& ctx, PADDLE_ENFORCE_GT( out_w, 0, "out_w in Attr(out_shape) of Op(interpolate) should be greater than 0."); - output->mutable_data({n, c, out_h, out_w}, ctx.GetPlace()); + framework::DDim dim_out; + if (data_layout == DataLayout::kNCHW) { + dim_out = {n, c, out_h, out_w}; + } else { + dim_out = {n, out_h, out_w, c}; + } + output->mutable_data(dim_out, ctx.GetPlace()); if (in_h == out_h && in_w == out_w) { framework::TensorCopy(input, ctx.GetPlace(), output); @@ -490,21 +582,21 @@ static void Interpolate2DCPUFwd(const framework::ExecutionContext& ctx, if ("bilinear" == interp_method) { BilinearInterpolation(input, output, ratio_h, ratio_w, in_h, in_w, n, c, - out_h, out_w, align_corners, align_mode); + out_h, out_w, align_corners, align_mode, + data_layout); } else if ("nearest" == interp_method) { NearestNeighborInterpolate(input, output, ratio_h, ratio_w, n, c, out_h, - out_w, align_corners); + out_w, align_corners, data_layout); } } template static void Interpolate3DCPUFwd(const framework::ExecutionContext& ctx, const Tensor& input, Tensor* output) { - const int n = input.dims()[0]; - const int c = input.dims()[1]; - const int in_d = input.dims()[2]; - const int in_h = input.dims()[3]; - const int in_w = input.dims()[4]; + const std::string data_layout_str = ctx.Attr("data_layout"); + const DataLayout data_layout = framework::StringToDataLayout(data_layout_str); + int n, c, in_d, in_h, in_w; + ExtractNCDWH(input.dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); auto interp_method = ctx.Attr("interp_method"); bool align_corners = ctx.Attr("align_corners"); @@ -552,7 +644,15 @@ static void Interpolate3DCPUFwd(const framework::ExecutionContext& ctx, PADDLE_ENFORCE_GT( out_w, 0, "out_w in Attr(out_shape) of Op(interpolate) should be greater than 0."); - output->mutable_data({n, c, out_d, out_h, out_w}, ctx.GetPlace()); + + framework::DDim dim_out; + if (data_layout == DataLayout::kNCHW) { + dim_out = {n, c, out_d, out_h, out_w}; + } else { + dim_out = {n, out_d, out_h, out_w, c}; + } + + output->mutable_data(dim_out, ctx.GetPlace()); if (in_d == out_d && in_h == out_h && in_w == out_w) { framework::TensorCopy(input, ctx.GetPlace(), output); @@ -578,7 +678,7 @@ static void Interpolate3DCPUFwd(const framework::ExecutionContext& ctx, if ("trilinear" == interp_method) { TrilinearInterpolation(input, output, ratio_d, ratio_h, ratio_w, in_d, in_h, in_w, n, c, out_d, out_h, out_w, - align_corners, align_mode); + align_corners, align_mode, data_layout); } } @@ -586,10 +686,10 @@ template static void Interpolate2DCPUBwd(const framework::ExecutionContext& ctx, Tensor* input_grad, const Tensor& output_grad) { auto* input = ctx.Input("X"); - const int n = input->dims()[0]; - const int c = input->dims()[1]; - const int in_h = input->dims()[2]; - const int in_w = input->dims()[3]; + const std::string data_layout_str = ctx.Attr("data_layout"); + const DataLayout data_layout = framework::StringToDataLayout(data_layout_str); + int n, c, in_d, in_h, in_w; + ExtractNCDWH(input->dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); auto interp_method = ctx.Attr("interp_method"); bool align_corners = ctx.Attr("align_corners"); @@ -623,7 +723,14 @@ static void Interpolate2DCPUBwd(const framework::ExecutionContext& ctx, out_w = new_size[1]; } - input_grad->mutable_data({n, c, in_h, in_w}, ctx.GetPlace()); + framework::DDim dim_grad; + if (data_layout == DataLayout::kNCHW) { + dim_grad = {n, c, in_h, in_w}; + } else { + dim_grad = {n, in_h, in_w, c}; + } + input_grad->mutable_data(dim_grad, ctx.GetPlace()); + auto& device_ctx = ctx.template device_context(); math::SetConstant zero; zero(device_ctx, input_grad, static_cast(0.0)); @@ -647,10 +754,11 @@ static void Interpolate2DCPUBwd(const framework::ExecutionContext& ctx, if ("bilinear" == interp_method) { BilinearInterpolationGrad(output_grad, input_grad, ratio_h, ratio_w, in_h, in_w, n, c, out_h, out_w, align_corners, - align_mode); + align_mode, data_layout); } else if ("nearest" == interp_method) { NearestNeighborInterpolateGrad(output_grad, input_grad, ratio_h, ratio_w, - n, c, out_h, out_w, align_corners); + n, c, out_h, out_w, align_corners, + data_layout); } } @@ -658,11 +766,10 @@ template static void Interpolate3DCPUBwd(const framework::ExecutionContext& ctx, Tensor* input_grad, const Tensor output_grad) { auto* input = ctx.Input("X"); - const int n = input->dims()[0]; - const int c = input->dims()[1]; - const int in_d = input->dims()[2]; - const int in_h = input->dims()[3]; - const int in_w = input->dims()[4]; + const std::string data_layout_str = ctx.Attr("data_layout"); + const DataLayout data_layout = framework::StringToDataLayout(data_layout_str); + int n, c, in_d, in_h, in_w; + ExtractNCDWH(input->dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); auto interp_method = ctx.Attr("interp_method"); bool align_corners = ctx.Attr("align_corners"); @@ -700,7 +807,13 @@ static void Interpolate3DCPUBwd(const framework::ExecutionContext& ctx, out_w = new_size[2]; } - input_grad->mutable_data({n, c, in_d, in_h, in_w}, ctx.GetPlace()); + framework::DDim dim_grad; + if (data_layout == DataLayout::kNCHW) { + dim_grad = {n, c, in_d, in_h, in_w}; + } else { + dim_grad = {n, in_d, in_h, in_w, c}; + } + input_grad->mutable_data(dim_grad, ctx.GetPlace()); auto& device_ctx = ctx.template device_context(); math::SetConstant zero; zero(device_ctx, input_grad, static_cast(0.0)); @@ -727,9 +840,9 @@ static void Interpolate3DCPUBwd(const framework::ExecutionContext& ctx, } if ("trilinear" == interp_method) { - TrilinearInterpolationGrad(output_grad, input_grad, ratio_d, ratio_h, - ratio_w, in_d, in_h, in_w, n, c, out_d, out_h, - out_w, align_corners, align_mode); + TrilinearInterpolationGrad( + output_grad, input_grad, ratio_d, ratio_h, ratio_w, in_d, in_h, in_w, n, + c, out_d, out_h, out_w, align_corners, align_mode, data_layout); } } diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 5c2df7a7cb..eeba899564 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -8019,15 +8019,17 @@ def image_resize(input, resample='BILINEAR', actual_shape=None, align_corners=True, - align_mode=1): + align_mode=1, + data_format='NCHW'): """ **Resize a Batch of Images** - The input must be a tensor of the shape (num_batches, channels, in_h, in_w) - or (num_batches, channels, in_d, in_h, in_w), and the resizing only applies - on the last two/three dimensions(depth, hight and width). + The input must be a 4-D Tensor of the shape (num_batches, channels, in_h, in_w) + or (num_batches, in_h, in_w, channels), or a 5-D Tensor of the shape + (num_batches, channels, in_d, in_h, in_w) or (num_batches, in_d, in_h, in_w, channels), + and the resizing only applies on the three dimensions(depth, hight and width). - **Warning:** the parameter :attr:`actual_shape` will be deprecated in the + **Warning:** the parameter :attr:`actual_shape` will be deprecated in the future and only use :attr:`out_shape` instead. Supporting resample methods: @@ -8144,16 +8146,13 @@ def image_resize(input, Args: - input (Variable): The input tensor of image resize layer, - This is a 4-D tensor of the shape - (num_batches, channels, in_h, in_w) or a - 5-D tensor of the shape - (num_batches, channls, in_d, in_h, in_w). + input (Variable): 4-D or 5-D Tensor, its data type is float32, float64, or uint8, + its data format is specified by :attr:`data_format`. out_shape(list|tuple|Variable|None): Output shape of image resize - layer, the shape is (out_h, out_w) when input is a 4-D tensor and is - (out_d, out_h, out_w) when input is a 5-D tensor. Default: None. If - a list, each element can be an integer or a tensor Variable of shape: [1]. - If a tesnosr Variable, its dimensions size should be a 1. + layer, the shape is (out_h, out_w) when input is a 4-D Tensor and is + (out_d, out_h, out_w) when input is a 5-D Tensor. Default: None. If + a list, each element can be an integer or a Tensor Variable of shape: [1]. + If a Tensor Variable, its dimensions size should be a 1. scale(float|Variable|None): The multiplier for the input height or width. At least one of :attr:`out_shape` or :attr:`scale` must be set. And :attr:`out_shape` has a higher priority than :attr:`scale`. @@ -8181,12 +8180,16 @@ def image_resize(input, Default: True align_mode(int) : An optional for bilinear interpolation. can be \'0\' for src_idx = scale*(dst_indx+0.5)-0.5 , can be \'1\' for - src_idx = scale*dst_index . + src_idx = scale*dst_index. + data_format(str, optional): NCHW(num_batches, channels, height, width) or + NHWC(num_batches, height, width, channels) for 4-D Tensor, + NCDHW(num_batches, channels, depth, height, width) or + NDHWC(num_batches, depth, height, width, channels) for 5-D Tensor. + Default: 'NCHW'. Returns: - Variable: The output is a 4-D tensor of the shape - (num_batches, channls, out_h, out_w) or a 5-D tensor of the shape - (num_batches, channels, out_d, out_h, out_w). + A 4-D Tensor of the shape (num_batches, channels, out_h, out_w) or (num_batches, out_h, out_w, channels), + or 5-D Tensor of the shape (num_batches, channels, out_d, out_h, out_w) or (num_batches, out_d, out_h, out_w, channels). Raises: TypeError: out_shape should be a list or tuple or Variable. @@ -8201,6 +8204,7 @@ def image_resize(input, ValueError: scale should be greater than zero. TypeError: align_corners shoule be a bool value ValueError: align_mode can only be '0' or '1' + ValueError: data_format can only be 'NCHW', 'NHWC', 'NCDHW' or 'NDHWC'. Examples: .. code-block:: python @@ -8259,9 +8263,23 @@ def image_resize(input, helper = LayerHelper('{}_interp'.format(resample_type), **locals()) dtype = helper.input_dtype() + if len(input.shape) == 4 and data_format not in ['NCHW', 'NHWC']: + raise ValueError( + "Got wrong value for param `data_format`: " + data_format + + " received but only `NCHW` or `NHWC` supported for 4-D input.") + elif len(input.shape) == 5 and data_format not in ['NCDHW', 'NDHWC']: + raise ValueError( + "Got wrong value for param `data_format`: " + data_format + + " received but only `NCDHW` or `NDHWC` supported for 5-D input.") + def _is_list_or_turple_(data): return (isinstance(data, list) or isinstance(data, tuple)) + if data_format == 'NCHW' or data_format == 'NCDHW': + data_layout = 'NCHW' + if data_format == 'NHWC' or data_format == 'NDHWC': + data_layout = 'NHWC' + inputs = {"X": input} attrs = { "out_d": -1, @@ -8269,7 +8287,8 @@ def image_resize(input, "out_w": -1, "interp_method": resample_type, "align_corners": align_corners, - "align_mode": align_mode + "align_mode": align_mode, + "data_layout": data_layout } if out_shape is not None: @@ -8368,7 +8387,8 @@ def resize_bilinear(input, name=None, actual_shape=None, align_corners=True, - align_mode=1): + align_mode=1, + data_format='NCHW'): """ Resize input by performing bilinear interpolation based on given output shape which specified by actual_shape, out_shape and scale @@ -8401,7 +8421,7 @@ def resize_bilinear(input, else: - scale_factor = float(in_size/out_size) + scale_factor = float(in_size/out_size) Bilinear interpolation: @@ -8414,31 +8434,24 @@ def resize_bilinear(input, H_out = (H_{in}+0.5) * scale_{factor} - 0.5 W_out = (W_{in}+0.5) * scale_{factor} - 0.5 - else: input : (N,C,H_in,W_in) output: (N,C,H_out,W_out) where: - H_out = H_{in} * scale_{factor} W_out = W_{in} * scale_{factor} - - Args: - input(${x_type}): input should be a 4-D tensor of shape - (num_batches, channels, in_h, in_w). - + input(${x_type}): 4-D Tensor, its data type is float32, float64, or uint8, + its data format is specified by :attr:`data_format`. out_shape(list|tuple|Variable|None): Output shape of resize bilinear layer, the shape is (out_h, out_w).Default: None. If a list, each - element can be an integer or a tensor Variable with shape: [1]. If a - tensor Variable, its dimension size should be 1. - + element can be an integer or a Tensor Variable with shape: [1]. If a + Tensor Variable, its dimension size should be 1. scale(float|Variable|None): The multiplier for the input height or width. At least one of :attr:`out_shape` or :attr:`scale` must be set. And :attr:`out_shape` has a higher priority than :attr:`scale`. Default: None. - name(str|None): The output variable name. actual_shape(Variable): An optional input to specify output shape dynamically. If provided, image resize @@ -8455,9 +8468,12 @@ def resize_bilinear(input, Default: None align_corners(bool): ${align_corners_comment} align_mode(bool): ${align_mode_comment} + data_format(str, optional): NCHW(num_batches, channels, height, width) or + NHWC(num_batches, height, width, channels). Default: 'NCHW'. Returns: - A 4-D tensor in shape of (num_batches, channels, out_h, out_w) + A 4-D Tensor in shape of (num_batches, channels, out_h, out_w) or + (num_batches, out_h, out_w, channels). Examples: .. code-block:: python @@ -8491,7 +8507,7 @@ def resize_bilinear(input, """ return image_resize(input, out_shape, scale, name, 'BILINEAR', actual_shape, - align_corners, align_mode) + align_corners, align_mode, data_format) @templatedoc(op_type="trilinear_interp") @@ -8501,7 +8517,8 @@ def resize_trilinear(input, name=None, actual_shape=None, align_corners=True, - align_mode=1): + align_mode=1, + data_format='NCDHW'): """ Resize input by performing trilinear interpolation based on given output shape which specified by actual_shape, out_shape and scale @@ -8538,6 +8555,7 @@ def resize_trilinear(input, Bilinear interpolation: if: + align_corners = False , align_mode = 0 input : (N,C,D_in,H_in,W_in) @@ -8547,7 +8565,6 @@ def resize_trilinear(input, H_out = (H_{in}+0.5) * scale_{factor} - 0.5 W_out = (W_{in}+0.5) * scale_{factor} - 0.5 - else: input : (N,C,D_in,H_in,W_in) @@ -8557,22 +8574,17 @@ def resize_trilinear(input, H_out = H_{in} * scale_{factor} W_out = W_{in} * scale_{factor} - - Args: - input(${x_type}): input should be a 5-D tensor of shape - (num_batches, channls, in_d, in_h, in_w). - + input(${x_type}): 5-D Tensor, its data type is float32, float64, or uint8, + its data format is specified by :attr:`data_format`. out_shape(list|tuple|Variable|None): Output shape of resize bilinear layer, the shape is (out_d, out_h, out_w). Default: None. If a list, - each element can be an integer or a tensor Variable with shape: [1]. If - a tensor Variable, its dimension size should be 1. - + each element can be an integer or a Tensor Variable with shape: [1]. If + a Tensor Variable, its dimension size should be 1. scale(float|Variable|None): The multiplier for the input depth, height or width. At least one of :attr:`out_shape` or :attr:`scale` must be set. And :attr:`out_shape` has a higher priority than :attr:`scale`. Default: None. - name(str|None): The output variable name. actual_shape(Variable): An optional input to specify output shape dynamically. If provided, image resize @@ -8589,9 +8601,13 @@ def resize_trilinear(input, Default: None align_corners(bool): ${align_corners_comment} align_mode(bool): ${align_mode_comment} + data_format(str, optional): NCDHW(num_batches, channels, depth, height, width) or + NDHWC(num_batches, depth, height, width, channels). + Default: 'NCDHW'. Returns: - A 5-D tensor in shape (num_batches, channels, out_d, out_h, out_w) + A 5-D Tensor in shape of (num_batches, channels, out_d, out_h, out_w) or + (num_batches, out_d, out_h, out_w, channels). Examples: .. code-block:: python @@ -8622,11 +8638,10 @@ def resize_trilinear(input, scale_tensor = fluid.layers.data(name="scale", shape=[1], dtype="float32", append_batch_size=False) out4 = fluid.layers.resize_trilinear(input, scale=scale_tensor) # out4.shape = [-1, 3, -1, -1, -1] - """ return image_resize(input, out_shape, scale, name, 'TRILINEAR', - actual_shape, align_corners, align_mode) + actual_shape, align_corners, align_mode, data_format) @templatedoc(op_type="nearest_interp") @@ -8635,12 +8650,12 @@ def resize_nearest(input, scale=None, name=None, actual_shape=None, - align_corners=True): + align_corners=True, + data_format='NCHW'): """ Resize input by performing nearest neighbor interpolation in both the - 3rd dimension(in height direction) and the 4th dimension(in width - direction) based on given output shape which is specified by actual_shape, - out_shape and scale in priority order. + height direction and the width direction based on given output shape + which is specified by actual_shape, out_shape and scale in priority order. **Warning:** the parameter :attr:`actual_shape` will be deprecated in the future and only use :attr:`out_shape` instead. @@ -8652,13 +8667,11 @@ def resize_nearest(input, For scale: if align_corners = True && out_size > 1 : - scale_factor = (in_size-1.0)/(out_size-1.0) else: scale_factor = float(in_size/out_size) - Nearest neighbor interpolation: @@ -8685,19 +8698,16 @@ def resize_nearest(input, https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation Args: - input(${x_type}): input should be a 4-D tensor of shape - (num_batches, channls, in_h, in_w). - + input(${x_type}): 4-D Tensor, its data type is float32, float64, or uint8, + its data format is specified by :attr:`data_format`. out_shape(list|tuple|Variable|None): Output shape of resize nearest layer, the shape is (out_h, out_w). Default: None. If a list, each element can be integer or a tensor Variable with shape: [1]. If a tensor Variable, its dimension size should be 1. - scale(float|Variable|None): The multiplier for the input height or width. At least one of :attr:`out_shape` or :attr:`scale` must be set. And :attr:`out_shape` has a higher priority than :attr:`scale`. Default: None. - name(str|None): The output variable name. actual_shape(Variable): An optional input to specify output shape dynamically. If provided, image resize @@ -8713,9 +8723,13 @@ def resize_nearest(input, errors would be occured in graph constructing stage. Default: None align_corners(bool): ${align_corners_comment} + data_format(str, optional): NCHW(num_batches, channels, height, width) or + NHWC(num_batches, height, width, channels). + Default: 'NCHW'. Returns: - A 4-D tensor in shape of (num_batches, channels, out_h, out_w) + A 4-D Tensor in shape of (num_batches, channels, out_h, out_w) or + (num_batches, out_h, out_w, channels). Examples: .. code-block:: python @@ -8746,11 +8760,18 @@ def resize_nearest(input, scale_tensor = fluid.layers.data(name="scale", shape=[1], dtype="float32", append_batch_size=False) out4 = fluid.layers.resize_nearest(input, scale=scale_tensor) # out4.shape = [-1, 3, -1, -1] - """ - return image_resize(input, out_shape, scale, name, 'NEAREST', actual_shape, - align_corners) + return image_resize( + input, + out_shape, + scale, + name, + 'NEAREST', + actual_shape, + align_corners, + align_mode=1, + data_format=data_format) def image_resize_short(input, out_short_len, resample='BILINEAR'): diff --git a/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py b/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py index 068a83f4a6..f77fe90c4c 100644 --- a/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py +++ b/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py @@ -27,8 +27,11 @@ def bilinear_interp_np(input, out_size=None, actual_shape=None, align_corners=True, - align_mode=0): + align_mode=0, + data_layout='NCHW'): """bilinear interpolation implement in shape [N, C, H, W]""" + if data_layout == "NHWC": + input = np.transpose(input, (0, 3, 1, 2)) # NHWC => NCHW if out_size is not None: out_h = out_size[0] out_w = out_size[1] @@ -83,6 +86,10 @@ def bilinear_interp_np(input, w1lambda*input[:, :, h, w+wid]) + \ h1lambda*(w2lambda*input[:, :, h+hid, w] + w1lambda*input[:, :, h+hid, w+wid]) + + if data_layout == "NHWC": + out = np.transpose(out, (0, 2, 3, 1)) # NCHW => NHWC + return out.astype(input.dtype) @@ -90,20 +97,28 @@ class TestBilinearInterpOp(OpTest): def setUp(self): self.out_size = None self.actual_shape = None + self.data_layout = 'NCHW' self.init_test_case() self.op_type = "bilinear_interp" input_np = np.random.random(self.input_shape).astype("float32") + if self.data_layout == "NCHW": + in_h = self.input_shape[2] + in_w = self.input_shape[3] + else: + in_h = self.input_shape[1] + in_w = self.input_shape[2] + if self.scale > 0: - out_h = int(self.input_shape[2] * self.scale) - out_w = int(self.input_shape[3] * self.scale) + out_h = int(in_h * self.scale) + out_w = int(in_w * self.scale) else: out_h = self.out_h out_w = self.out_w output_np = bilinear_interp_np(input_np, out_h, out_w, self.out_size, self.actual_shape, self.align_corners, - self.align_mode) + self.align_mode, self.data_layout) self.inputs = {'X': input_np} if self.out_size is not None: self.inputs['OutSize'] = self.out_size @@ -116,7 +131,8 @@ class TestBilinearInterpOp(OpTest): 'scale': self.scale, 'interp_method': self.interp_method, 'align_corners': self.align_corners, - 'align_mode': self.align_mode + 'align_mode': self.align_mode, + 'data_layout': self.data_layout } self.outputs = {'Out': output_np} @@ -229,6 +245,19 @@ class TestBilinearInterpActualShape(TestBilinearInterpOp): self.align_mode = 1 +class TestBilinearInterpDataLayout(TestBilinearInterpOp): + def init_test_case(self): + self.interp_method = 'bilinear' + self.input_shape = [2, 4, 4, 3] + self.out_h = 2 + self.out_w = 2 + self.scale = 0. + self.out_size = np.array([3, 3]).astype("int32") + self.align_corners = True + self.align_mode = 1 + self.data_layout = "NHWC" + + class TestBilinearInterpOpUint8(OpTest): def setUp(self): self.out_size = None diff --git a/python/paddle/fluid/tests/unittests/test_nearest_interp_op.py b/python/paddle/fluid/tests/unittests/test_nearest_interp_op.py index e3240f4c8c..6f0d85ed3c 100644 --- a/python/paddle/fluid/tests/unittests/test_nearest_interp_op.py +++ b/python/paddle/fluid/tests/unittests/test_nearest_interp_op.py @@ -26,8 +26,11 @@ def nearest_neighbor_interp_np(X, out_w, out_size=None, actual_shape=None, - align_corners=True): + align_corners=True, + data_layout='NCHW'): """nearest neighbor interpolation implement in shape [N, C, H, W]""" + if data_layout == "NHWC": + X = np.transpose(X, (0, 3, 1, 2)) # NHWC => NCHW if out_size is not None: out_h = out_size[0] out_w = out_size[1] @@ -63,6 +66,9 @@ def nearest_neighbor_interp_np(X, in_j = int(ratio_w * j) out[:, :, i, j] = X[:, :, in_i, in_j] + if data_layout == "NHWC": + out = np.transpose(out, (0, 2, 3, 1)) # NCHW => NHWC + return out.astype(X.dtype) @@ -70,20 +76,28 @@ class TestNearestInterpOp(OpTest): def setUp(self): self.out_size = None self.actual_shape = None + self.data_layout = 'NCHW' self.init_test_case() self.op_type = "nearest_interp" input_np = np.random.random(self.input_shape).astype("float32") + if self.data_layout == "NCHW": + in_h = self.input_shape[2] + in_w = self.input_shape[3] + else: + in_h = self.input_shape[1] + in_w = self.input_shape[2] + if self.scale > 0: - out_h = int(self.input_shape[2] * self.scale) - out_w = int(self.input_shape[3] * self.scale) + out_h = int(in_h * self.scale) + out_w = int(in_w * self.scale) else: out_h = self.out_h out_w = self.out_w - output_np = nearest_neighbor_interp_np(input_np, out_h, out_w, - self.out_size, self.actual_shape, - self.align_corners) + output_np = nearest_neighbor_interp_np( + input_np, out_h, out_w, self.out_size, self.actual_shape, + self.align_corners, self.data_layout) self.inputs = {'X': input_np} if self.out_size is not None: self.inputs['OutSize'] = self.out_size @@ -95,6 +109,7 @@ class TestNearestInterpOp(OpTest): 'scale': self.scale, 'interp_method': self.interp_method, 'align_corners': self.align_corners, + 'data_layout': self.data_layout } self.outputs = {'Out': output_np} @@ -198,6 +213,18 @@ class TestNearestNeighborInterpActualShape(TestNearestInterpOp): self.align_corners = True +class TestNearestNeighborInterpDataLayout(TestNearestInterpOp): + def init_test_case(self): + self.interp_method = 'nearest' + self.input_shape = [2, 4, 4, 5] + self.out_h = 2 + self.out_w = 2 + self.scale = 0. + self.out_size = np.array([3, 8]).astype("int32") + self.align_corners = True + self.data_layout = "NHWC" + + class TestNearestInterpOpUint8(OpTest): def setUp(self): self.out_size = None @@ -399,6 +426,7 @@ class TestNearestInterp_attr_tensor_Case3(TestNearestInterpOp_attr_tensor): class TestNearestAPI(OpTest): def test_case(self): x = fluid.layers.data(name="x", shape=[3, 6, 6], dtype="float32") + y = fluid.layers.data(name="y", shape=[6, 6, 3], dtype="float32") dim = fluid.layers.data( name="dim", shape=[1], dtype="int32", append_batch_size=False) @@ -418,7 +446,8 @@ class TestNearestAPI(OpTest): dtype="float32", append_batch_size=False) - out1 = fluid.layers.resize_nearest(x, out_shape=[12, 12]) + out1 = fluid.layers.resize_nearest( + y, out_shape=[12, 12], data_format='NHWC') out2 = fluid.layers.resize_nearest(x, out_shape=[12, dim]) out3 = fluid.layers.resize_nearest(x, out_shape=shape_tensor) out4 = fluid.layers.resize_nearest( @@ -436,6 +465,7 @@ class TestNearestAPI(OpTest): results = exe.run(fluid.default_main_program(), feed={ "x": x_data, + "y": np.transpose(x_data, (0, 2, 3, 1)), "dim": dim_data, "shape_tensor": shape_data, "actual_size": actual_size_data, @@ -446,8 +476,20 @@ class TestNearestAPI(OpTest): expect_res = nearest_neighbor_interp_np( x_data, out_h=12, out_w=12, align_corners=True) - for res in results: - self.assertTrue(np.allclose(res, expect_res)) + self.assertTrue( + np.allclose(results[0], np.transpose(expect_res, (0, 2, 3, 1)))) + for i in range(len(results) - 1): + self.assertTrue(np.allclose(results[i + 1], expect_res)) + + def test_exception(self): + # for 4-D input, data_format can only be NCHW or NHWC + input = fluid.layers.data( + name="input", shape=[3, 6, 6], dtype="float32") + try: + out = fluid.layers.resize_nearest( + input, out_shape=[4, 8], data_format='NDHWC') + except: + pass if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_trilinear_interp_op.py b/python/paddle/fluid/tests/unittests/test_trilinear_interp_op.py index 1f41883456..dcd4c71527 100644 --- a/python/paddle/fluid/tests/unittests/test_trilinear_interp_op.py +++ b/python/paddle/fluid/tests/unittests/test_trilinear_interp_op.py @@ -28,8 +28,11 @@ def trilinear_interp_np(input, out_size=None, actual_shape=None, align_corners=True, - align_mode=0): + align_mode=0, + data_layout='NCDHW'): """trilinear interpolation implement in shape [N, C, D, H, W]""" + if data_layout == "NDHWC": + input = np.transpose(input, (0, 4, 1, 2, 3)) # NDHWC => NCDHW if out_size is not None: out_d = out_size[0] out_h = out_size[1] @@ -114,6 +117,9 @@ def trilinear_interp_np(input, w1lambda * input[:, :, d+did, h, w+wid]) + \ h1lambda * (w2lambda * input[:, :, d+did, h+hid, w] + \ w1lambda * input[:, :, d+did, h+hid, w+wid])) + if data_layout == "NDHWC": + out = np.transpose(out, (0, 2, 3, 4, 1)) # NCDHW => NDHWC + return out.astype(input.dtype) @@ -121,28 +127,42 @@ class TestTrilinearInterpOp(OpTest): def setUp(self): self.out_size = None self.actual_shape = None + self.data_layout = 'NCDHW' self.init_test_case() self.op_type = "trilinear_interp" input_np = np.random.random(self.input_shape).astype("float32") + if self.data_layout == "NCDHW": + in_d = self.input_shape[2] + in_h = self.input_shape[3] + in_w = self.input_shape[4] + else: + in_d = self.input_shape[1] + in_h = self.input_shape[2] + in_w = self.input_shape[3] + if self.scale > 0: - out_d = int(self.input_shape[2] * self.scale) - out_h = int(self.input_shape[3] * self.scale) - out_w = int(self.input_shape[4] * self.scale) + out_d = int(in_d * self.scale) + out_h = int(in_h * self.scale) + out_w = int(in_w * self.scale) else: out_d = self.out_d out_h = self.out_h out_w = self.out_w - output_np = trilinear_interp_np(input_np, out_d, out_h, out_w, - self.out_size, self.actual_shape, - self.align_corners, self.align_mode) + output_np = trilinear_interp_np( + input_np, out_d, out_h, out_w, self.out_size, self.actual_shape, + self.align_corners, self.align_mode, self.data_layout) self.inputs = {'X': input_np} if self.out_size is not None: self.inputs['OutSize'] = self.out_size if self.actual_shape is not None: self.inputs['OutSize'] = self.actual_shape - + # c++ end treat NCDHW the same way as NCHW + if self.data_layout == 'NCDHW': + data_layout = 'NCHW' + else: + data_layout = 'NHWC' self.attrs = { 'out_d': self.out_d, 'out_h': self.out_h, @@ -150,7 +170,8 @@ class TestTrilinearInterpOp(OpTest): 'scale': self.scale, 'interp_method': self.interp_method, 'align_corners': self.align_corners, - 'align_mode': self.align_mode + 'align_mode': self.align_mode, + 'data_layout': data_layout } self.outputs = {'Out': output_np} @@ -284,6 +305,20 @@ class TestTrilinearInterpActualShape(TestTrilinearInterpOp): self.align_mode = 1 +class TestTrilinearInterpDatalayout(TestTrilinearInterpOp): + def init_test_case(self): + self.interp_method = 'trilinear' + self.input_shape = [2, 4, 4, 4, 3] + self.out_d = 2 + self.out_h = 2 + self.out_w = 2 + self.scale = 0. + self.out_size = np.array([3, 3, 3]).astype("int32") + self.align_corners = True + self.align_mode = 1 + self.data_layout = "NDHWC" + + class TestTrilinearInterpOpUint8(OpTest): def setUp(self): self.out_size = None @@ -536,6 +571,7 @@ class TestTrilinearInterp_attr_tensor_Case3(TestTrilinearInterpOp_attr_tensor): class TestTrilinearInterpAPI(OpTest): def test_case(self): x = fluid.layers.data(name="x", shape=[3, 6, 9, 4], dtype="float32") + y = fluid.layers.data(name="y", shape=[6, 9, 4, 3], dtype="float32") dim = fluid.layers.data(name="dim", shape=[1], dtype="int32") shape_tensor = fluid.layers.data( @@ -554,7 +590,8 @@ class TestTrilinearInterpAPI(OpTest): dtype="float32", append_batch_size=False) - out1 = fluid.layers.resize_trilinear(x, out_shape=[12, 18, 8]) + out1 = fluid.layers.resize_trilinear( + y, out_shape=[12, 18, 8], data_format='NDHWC') out2 = fluid.layers.resize_trilinear(x, out_shape=[12, dim, 8]) out3 = fluid.layers.resize_trilinear(x, out_shape=shape_tensor) out4 = fluid.layers.resize_trilinear( @@ -572,6 +609,7 @@ class TestTrilinearInterpAPI(OpTest): results = exe.run(fluid.default_main_program(), feed={ "x": x_data, + "y": np.transpose(x_data, (0, 2, 3, 4, 1)), "dim": dim_data, "shape_tensor": shape_data, "actual_size": actual_size_data, @@ -582,8 +620,20 @@ class TestTrilinearInterpAPI(OpTest): expect_res = trilinear_interp_np( x_data, out_d=12, out_h=18, out_w=8, align_mode=1) - for res in results: - self.assertTrue(np.allclose(res, expect_res)) + self.assertTrue( + np.allclose(results[0], np.transpose(expect_res, (0, 2, 3, 4, 1)))) + for i in range(len(results) - 1): + self.assertTrue(np.allclose(results[i + 1], expect_res)) + + def test_exception(self): + input = fluid.layers.data( + name="input", shape=[3, 6, 9, 4], dtype="float32") + try: + # for 5-D input, data_format only can be NCDHW or NDHWC + out = fluid.layers.resize_trilinear( + input, out_shape=[4, 8, 4], data_format='NHWC') + except: + pass if __name__ == "__main__": -- GitLab