From 1773fbba3c24cee7d885895eb4dfec416bb11fb5 Mon Sep 17 00:00:00 2001 From: duanyanhui <45005871+YanhuiDua@users.noreply.github.com> Date: Fri, 12 Aug 2022 13:50:03 +0800 Subject: [PATCH] enhance grid_sampler to support 3d input (#45015) * enhance grid_sampler to support 3d input --- paddle/fluid/operators/grid_sampler_op.cc | 12 +- paddle/phi/infermeta/binary.cc | 34 +- .../kernels/gpu/grid_sample_grad_kernel.cu | 424 ++++++++++++++++-- paddle/phi/kernels/gpu/grid_sample_kernel.cu | 267 +++++++++-- paddle/phi/kernels/gpu/grid_sample_utils.h | 5 + .../tests/unittests/test_grid_sampler_op.py | 346 ++++++++++++-- python/paddle/nn/functional/vision.py | 73 +-- 7 files changed, 1029 insertions(+), 132 deletions(-) diff --git a/paddle/fluid/operators/grid_sampler_op.cc b/paddle/fluid/operators/grid_sampler_op.cc index 42db5442894..5bfdfcdf7e4 100644 --- a/paddle/fluid/operators/grid_sampler_op.cc +++ b/paddle/fluid/operators/grid_sampler_op.cc @@ -54,13 +54,19 @@ class GridSampleOpMaker : public framework::OpProtoAndCheckerMaker { void Make() override { AddInput("X", "(Tensor) The input data of GridSampleOp, " - "This is a 4-D tensor with shape of [N, C, H, W]"); + "This is a 4-D tensor with shape of [N, C, H, W] or" + " a 5-D tensot with shape of [N, C, D, H, W]"); AddInput( "Grid", "(Tensor) The input grid of GridSampleOp generated by AffineGridOp, " "This is a 4-D tensor with shape of [N, H, W, 2] is the concatenation " - "of x and y coordinates with shape [N, H, W] in last dimension"); - AddOutput("Output", "(Tensor) Output tensor with shape [N, C, H, W]"); + "of x and y coordinates with shape [N, H, W] in last dimension or " + "a 5-D tensor with shape of [N, D, H, W, 3] is the concatenation " + "of depth, x and y coordinates with shape [N, D, H, W] in last " + "dimension "); + AddOutput("Output", + "(Tensor) Output tensor with shape [N, C, H, W] or shape [N,C, " + "D, H ,W]"); AddAttr( "use_cudnn", "(bool, default true) Only used in cudnn kernel, need install cudnn") diff --git a/paddle/phi/infermeta/binary.cc b/paddle/phi/infermeta/binary.cc index 44e53fc32cc..46a76e3dc3f 100644 --- a/paddle/phi/infermeta/binary.cc +++ b/paddle/phi/infermeta/binary.cc @@ -1288,19 +1288,31 @@ void GridSampleBaseInferMeta(const MetaTensor& x, MetaConfig config) { auto x_dims = x.dims(); auto grid_dims = grid.dims(); - PADDLE_ENFORCE_EQ(x_dims.size(), + PADDLE_ENFORCE_GE(x_dims.size(), 4, phi::errors::InvalidArgument( "Input(X) of GridSampleOp should be 4-D Tensor, but " "received X dimension size(%d)", x_dims.size())); - PADDLE_ENFORCE_EQ(grid_dims.size(), + PADDLE_ENFORCE_LE(x_dims.size(), + 5, + phi::errors::InvalidArgument( + "Input(X) of GridSampleOp should be 4-D Tensor, but " + "received X dimension size(%d)", + x_dims.size())); + PADDLE_ENFORCE_GE(grid_dims.size(), 4, phi::errors::InvalidArgument( "Input(Grid) of GridSampleOp should be 4-D Tensor, " "but received X dimension size(%d)", grid_dims.size())); - if (config.is_runtime || grid_dims[3] > 0) { + PADDLE_ENFORCE_LE(grid_dims.size(), + 5, + phi::errors::InvalidArgument( + "Input(Grid) of GridSampleOp should be 4-D Tensor, " + "but received X dimension size(%d)", + grid_dims.size())); + if (grid_dims.size() == 4 && (config.is_runtime || grid_dims[3] > 0)) { PADDLE_ENFORCE_EQ( grid_dims[3], 2, @@ -1308,6 +1320,14 @@ void GridSampleBaseInferMeta(const MetaTensor& x, "Input(Grid) dimension[3] should be 2, but received %d", grid_dims[3])); } + if (grid_dims.size() == 5 && (config.is_runtime || grid_dims[4] > 0)) { + PADDLE_ENFORCE_EQ( + grid_dims[4], + 3, + phi::errors::InvalidArgument( + "Input(Grid) dimension[4] should be 3, but received %d", + grid_dims[4])); + } if (config.is_runtime) { PADDLE_ENFORCE_EQ( grid_dims[0], @@ -1318,8 +1338,12 @@ void GridSampleBaseInferMeta(const MetaTensor& x, x_dims[0], grid_dims[0])); } - - out->set_dims({x_dims[0], x_dims[1], grid_dims[1], grid_dims[2]}); + if (grid_dims.size() == 4) { + out->set_dims({x_dims[0], x_dims[1], grid_dims[1], grid_dims[2]}); + } else { + out->set_dims( + {x_dims[0], x_dims[1], grid_dims[1], grid_dims[2], grid_dims[3]}); + } out->set_dtype(x.dtype()); out->share_lod(x); } diff --git a/paddle/phi/kernels/gpu/grid_sample_grad_kernel.cu b/paddle/phi/kernels/gpu/grid_sample_grad_kernel.cu index a258e041b17..551f9c53bc0 100644 --- a/paddle/phi/kernels/gpu/grid_sample_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/grid_sample_grad_kernel.cu @@ -32,6 +32,23 @@ static __forceinline__ __device__ void AtomicAdd( } } +template +static __forceinline__ __device__ void AtomicAdd3D(T* data, + int64_t d, + int64_t h, + int64_t w, + int64_t sD, + int64_t sH, + int64_t sW, + int64_t D, + int64_t H, + int64_t W, + T delta) { + if (InBounds3D(d, h, w, D, H, W)) { + atomicAdd(data + d * sD + h * sH + w * sW, delta); + } +} + template static __forceinline__ __device__ T UnnormalizeWithMask(T coord, int size, bool align_corners, T* grad_in) { @@ -249,6 +266,305 @@ __global__ void GridSamplerCudaBackwardKernel(const int nthreads, } } +template +__global__ void GridSampler3DCudaBackwardKernel(const int nthreads, + const T* grad_output, + const T* input, + const T* grid, + int out_c, + int out_d, + int out_h, + int out_w, + int in_d, + int in_h, + int in_w, + T* grad_input, + T* grad_grid, + const Mode mode, + const PaddingMode padding_mode, + bool align_corners) { + int inp_sW = 1; + int inp_sH = in_w; + int inp_sD = in_h * in_w; + int inp_sC = in_d * inp_sD; + int inp_sN = out_c * inp_sC; + + int grid_sCoor = 1; + int grid_sW = 3; + int grid_sH = out_w * grid_sW; + int grid_sD = out_h * grid_sH; + int grid_sN = out_d * grid_sD; + + int gOut_sW = 1; + int gOut_sH = out_w; + int gOut_sD = out_h * out_w; + int gOut_sC = out_d * gOut_sD; + int gOut_sN = out_c * gOut_sC; + + CUDA_KERNEL_LOOP_TYPE(index, nthreads, int) { + const int w = index % out_w; + const int h = (index / out_w) % out_h; + const int d = (index / (out_h * out_w)) % out_d; + const int n = index / (out_d * out_h * out_w); + const auto grid_offset = + n * grid_sN + d * grid_sD + h * grid_sH + w * grid_sW; + + // get the corresponding input x, y, z co-ordinates from grid + T ix = grid[grid_offset]; + T iy = grid[grid_offset + grid_sCoor]; + T iz = grid[grid_offset + 2 * grid_sCoor]; + + // multipliers for gradients on ix, iy, and iz + T gix_mult, giy_mult, giz_mult; + ix = ComputePositionsWithMask( + ix, in_w, padding_mode, align_corners, &gix_mult); + iy = ComputePositionsWithMask( + iy, in_h, padding_mode, align_corners, &giy_mult); + iz = ComputePositionsWithMask( + iz, in_d, padding_mode, align_corners, &giz_mult); + + if (mode == Mode::bilinear) { + // get corner pixel values from (x, y, z) + // for 4d, we used north-east-south-west + // for 5d, we add top-bottom + int ix_tnw = static_cast(std::floor(ix)); + int iy_tnw = static_cast(std::floor(iy)); + int iz_tnw = static_cast(std::floor(iz)); + + int ix_tne = ix_tnw + 1; + int iy_tne = iy_tnw; + int iz_tne = iz_tnw; + + int ix_tsw = ix_tnw; + int iy_tsw = iy_tnw + 1; + int iz_tsw = iz_tnw; + + int ix_tse = ix_tnw + 1; + int iy_tse = iy_tnw + 1; + int iz_tse = iz_tnw; + + int ix_bnw = ix_tnw; + int iy_bnw = iy_tnw; + int iz_bnw = iz_tnw + 1; + + int ix_bne = ix_tnw + 1; + int iy_bne = iy_tnw; + int iz_bne = iz_tnw + 1; + + int ix_bsw = ix_tnw; + int iy_bsw = iy_tnw + 1; + int iz_bsw = iz_tnw + 1; + + int ix_bse = ix_tnw + 1; + int iy_bse = iy_tnw + 1; + int iz_bse = iz_tnw + 1; + + // get surfaces to each neighbor: + T tnw = (ix_bse - ix) * (iy_bse - iy) * (iz_bse - iz); + T tne = (ix - ix_bsw) * (iy_bsw - iy) * (iz_bsw - iz); + T tsw = (ix_bne - ix) * (iy - iy_bne) * (iz_bne - iz); + T tse = (ix - ix_bnw) * (iy - iy_bnw) * (iz_bnw - iz); + T bnw = (ix_tse - ix) * (iy_tse - iy) * (iz - iz_tse); + T bne = (ix - ix_tsw) * (iy_tsw - iy) * (iz - iz_tsw); + T bsw = (ix_tne - ix) * (iy - iy_tne) * (iz - iz_tne); + T bse = (ix - ix_tnw) * (iy - iy_tnw) * (iz - iz_tnw); + + T gix = static_cast(0), giy = static_cast(0), + giz = static_cast(0); + int gOut_offset = n * gOut_sN + d * gOut_sD + h * gOut_sH + w * gOut_sW; + int inp_offset_NC = n * inp_sN; + T* gInp_ptr_NC = grad_input + n * inp_sN; + for (int c = 0; c < out_c; ++c, + gOut_offset += gOut_sC, + gInp_ptr_NC += inp_sC, + inp_offset_NC += inp_sC) { + T gOut = grad_output[gOut_offset]; + + AtomicAdd3D(gInp_ptr_NC, + iz_tnw, + iy_tnw, + ix_tnw, + inp_sD, + inp_sH, + inp_sW, + in_d, + in_h, + in_w, + tnw * gOut); + AtomicAdd3D(gInp_ptr_NC, + iz_tne, + iy_tne, + ix_tne, + inp_sD, + inp_sH, + inp_sW, + in_d, + in_h, + in_w, + tne * gOut); + AtomicAdd3D(gInp_ptr_NC, + iz_tsw, + iy_tsw, + ix_tsw, + inp_sD, + inp_sH, + inp_sW, + in_d, + in_h, + in_w, + tsw * gOut); + AtomicAdd3D(gInp_ptr_NC, + iz_tse, + iy_tse, + ix_tse, + inp_sD, + inp_sH, + inp_sW, + in_d, + in_h, + in_w, + tse * gOut); + AtomicAdd3D(gInp_ptr_NC, + iz_bnw, + iy_bnw, + ix_bnw, + inp_sD, + inp_sH, + inp_sW, + in_d, + in_h, + in_w, + bnw * gOut); + AtomicAdd3D(gInp_ptr_NC, + iz_bne, + iy_bne, + ix_bne, + inp_sD, + inp_sH, + inp_sW, + in_d, + in_h, + in_w, + bne * gOut); + AtomicAdd3D(gInp_ptr_NC, + iz_bsw, + iy_bsw, + ix_bsw, + inp_sD, + inp_sH, + inp_sW, + in_d, + in_h, + in_w, + bsw * gOut); + AtomicAdd3D(gInp_ptr_NC, + iz_bse, + iy_bse, + ix_bse, + inp_sD, + inp_sH, + inp_sW, + in_d, + in_h, + in_w, + bse * gOut); + + // calculate grad_grid + if (InBounds3D(iz_tnw, iy_tnw, ix_tnw, in_d, in_h, in_w)) { + T tnw_val = input[inp_offset_NC + iz_tnw * inp_sD + iy_tnw * inp_sH + + ix_tnw * inp_sW]; + gix -= tnw_val * (iy_bse - iy) * (iz_bse - iz) * gOut; + giy -= tnw_val * (ix_bse - ix) * (iz_bse - iz) * gOut; + giz -= tnw_val * (ix_bse - ix) * (iy_bse - iy) * gOut; + } + if (InBounds3D(iz_tne, iy_tne, ix_tne, in_d, in_h, in_w)) { + T tne_val = input[inp_offset_NC + iz_tne * inp_sD + iy_tne * inp_sH + + ix_tne * inp_sW]; + gix += tne_val * (iy_bsw - iy) * (iz_bsw - iz) * gOut; + giy -= tne_val * (ix - ix_bsw) * (iz_bsw - iz) * gOut; + giz -= tne_val * (ix - ix_bsw) * (iy_bsw - iy) * gOut; + } + if (InBounds3D(iz_tsw, iy_tsw, ix_tsw, in_d, in_h, in_w)) { + T tsw_val = input[inp_offset_NC + iz_tsw * inp_sD + iy_tsw * inp_sH + + ix_tsw * inp_sW]; + gix -= tsw_val * (iy - iy_bne) * (iz_bne - iz) * gOut; + giy += tsw_val * (ix_bne - ix) * (iz_bne - iz) * gOut; + giz -= tsw_val * (ix_bne - ix) * (iy - iy_bne) * gOut; + } + if (InBounds3D(iz_tse, iy_tse, ix_tse, in_d, in_h, in_w)) { + T tse_val = input[inp_offset_NC + iz_tse * inp_sD + iy_tse * inp_sH + + ix_tse * inp_sW]; + gix += tse_val * (iy - iy_bnw) * (iz_bnw - iz) * gOut; + giy += tse_val * (ix - ix_bnw) * (iz_bnw - iz) * gOut; + giz -= tse_val * (ix - ix_bnw) * (iy - iy_bnw) * gOut; + } + if (InBounds3D(iz_bnw, iy_bnw, ix_bnw, in_d, in_h, in_w)) { + T bnw_val = input[inp_offset_NC + iz_bnw * inp_sD + iy_bnw * inp_sH + + ix_bnw * inp_sW]; + gix -= bnw_val * (iy_tse - iy) * (iz - iz_tse) * gOut; + giy -= bnw_val * (ix_tse - ix) * (iz - iz_tse) * gOut; + giz += bnw_val * (ix_tse - ix) * (iy_tse - iy) * gOut; + } + if (InBounds3D(iz_bne, iy_bne, ix_bne, in_d, in_h, in_w)) { + T bne_val = input[inp_offset_NC + iz_bne * inp_sD + iy_bne * inp_sH + + ix_bne * inp_sW]; + gix += bne_val * (iy_tsw - iy) * (iz - iz_tsw) * gOut; + giy -= bne_val * (ix - ix_tsw) * (iz - iz_tsw) * gOut; + giz += bne_val * (ix - ix_tsw) * (iy_tsw - iy) * gOut; + } + if (InBounds3D(iz_bsw, iy_bsw, ix_bsw, in_d, in_h, in_w)) { + T bsw_val = input[inp_offset_NC + iz_bsw * inp_sD + iy_bsw * inp_sH + + ix_bsw * inp_sW]; + gix -= bsw_val * (iy - iy_tne) * (iz - iz_tne) * gOut; + giy += bsw_val * (ix_tne - ix) * (iz - iz_tne) * gOut; + giz += bsw_val * (ix_tne - ix) * (iy - iy_tne) * gOut; + } + if (InBounds3D(iz_bse, iy_bse, ix_bse, in_d, in_h, in_w)) { + T bse_val = input[inp_offset_NC + iz_bse * inp_sD + iy_bse * inp_sH + + ix_bse * inp_sW]; + gix += bse_val * (iy - iy_tnw) * (iz - iz_tnw) * gOut; + giy += bse_val * (ix - ix_tnw) * (iz - iz_tnw) * gOut; + giz += bse_val * (ix - ix_tnw) * (iy - iy_tnw) * gOut; + } + } + if (grad_grid != nullptr) { + T* gGrid_ptr_NDHW = grad_grid + index * grid_sW; + gGrid_ptr_NDHW[0] = gix_mult * gix; + gGrid_ptr_NDHW[1] = giy_mult * giy; + gGrid_ptr_NDHW[2] = giz_mult * giz; + } + } else if (mode == Mode::nearest) { + auto ix_nearest = static_cast(std::round(ix)); + auto iy_nearest = static_cast(std::round(iy)); + auto iz_nearest = static_cast(std::round(iz)); + + // assign nearest neighor pixel value to output pixel + int gOut_offset = n * gOut_sN + d * gOut_sD + h * gOut_sH + w * gOut_sW; + T* gInp_ptr_NC = grad_input + n * inp_sN; + for (int c = 0; c < out_c; + ++c, gOut_offset += gOut_sC, gInp_ptr_NC += inp_sC) { + AtomicAdd3D(gInp_ptr_NC, + iz_nearest, + iy_nearest, + ix_nearest, + inp_sD, + inp_sH, + inp_sW, + in_d, + in_h, + in_w, + grad_output[gOut_offset]); + } + if (grad_grid != nullptr) { + T* gGrid_ptr_NDHW = grad_grid + index * grid_sW; + gGrid_ptr_NDHW[0] = static_cast(0); + gGrid_ptr_NDHW[1] = static_cast(0); + gGrid_ptr_NDHW[2] = static_cast(0); + } + } + } +} + template void GridSampleGradKernel(const Context& dev_ctx, const DenseTensor& x, @@ -275,42 +591,84 @@ void GridSampleGradKernel(const Context& dev_ctx, enum_mode = Mode::bilinear; } - const int n = grid.dims()[0]; - const int out_h = grid.dims()[1]; - const int out_w = grid.dims()[2]; - const int c = x.dims()[1]; - const int in_h = x.dims()[2]; - const int in_w = x.dims()[3]; + if (x.dims().size() == 4) { + const int n = grid.dims()[0]; + const int out_h = grid.dims()[1]; + const int out_w = grid.dims()[2]; + const int c = x.dims()[1]; + const int in_h = x.dims()[2]; + const int in_w = x.dims()[3]; - dev_ctx.template Alloc(x_grad); - phi::funcs::SetConstant()(dev_ctx, x_grad, static_cast(0)); + dev_ctx.template Alloc(x_grad); + phi::funcs::SetConstant()(dev_ctx, x_grad, static_cast(0)); - T* grid_grad_data = nullptr; - if (grid_grad != nullptr) { - grid_grad_data = dev_ctx.template Alloc(grid_grad); - } + T* grid_grad_data = nullptr; + if (grid_grad != nullptr) { + grid_grad_data = dev_ctx.template Alloc(grid_grad); + } + + int count = static_cast(n * out_h * out_w); + auto cu_stream = dev_ctx.stream(); + backends::gpu::GpuLaunchConfig config = + backends::gpu::GetGpuLaunchConfig1D(dev_ctx, count); + GridSamplerCudaBackwardKernel + <<>>( + count, + out_grad.data(), + x.data(), + grid.data(), + n, + c, + out_h, + out_w, + in_h, + in_w, + x_grad->data(), + grid_grad_data, + enum_mode, + enum_padding_mode, + align_corners); + } else { + const int out_d = grid.dims()[1]; + const int out_h = grid.dims()[2]; + const int out_w = grid.dims()[3]; + const int n = x.dims()[0]; + const int c = x.dims()[1]; + const int in_d = x.dims()[2]; + const int in_h = x.dims()[3]; + const int in_w = x.dims()[4]; - int count = static_cast(n * out_h * out_w); - auto cu_stream = dev_ctx.stream(); - backends::gpu::GpuLaunchConfig config = - backends::gpu::GetGpuLaunchConfig1D(dev_ctx, count); - GridSamplerCudaBackwardKernel - <<>>( - count, - out_grad.data(), - x.data(), - grid.data(), - n, - c, - out_h, - out_w, - in_h, - in_w, - x_grad->data(), - grid_grad_data, - enum_mode, - enum_padding_mode, - align_corners); + dev_ctx.template Alloc(x_grad); + phi::funcs::SetConstant()(dev_ctx, x_grad, static_cast(0)); + + T* grid_grad_data = nullptr; + if (grid_grad != nullptr) { + grid_grad_data = dev_ctx.template Alloc(grid_grad); + } + + int count = static_cast(n * out_d * out_h * out_w); + auto cu_stream = dev_ctx.stream(); + backends::gpu::GpuLaunchConfig config = + backends::gpu::GetGpuLaunchConfig1D(dev_ctx, count); + GridSampler3DCudaBackwardKernel + <<>>( + count, + out_grad.data(), + x.data(), + grid.data(), + c, + out_d, + out_h, + out_w, + in_d, + in_h, + in_w, + x_grad->data(), + grid_grad_data, + enum_mode, + enum_padding_mode, + align_corners); + } } } // namespace phi diff --git a/paddle/phi/kernels/gpu/grid_sample_kernel.cu b/paddle/phi/kernels/gpu/grid_sample_kernel.cu index 0bf0435240a..ff657d9dc46 100644 --- a/paddle/phi/kernels/gpu/grid_sample_kernel.cu +++ b/paddle/phi/kernels/gpu/grid_sample_kernel.cu @@ -169,6 +169,169 @@ __global__ void GridSampleCudaKernel(const int nthreads, } } +template +__global__ void GridSample3DCudaKernel(const int nthreads, + int out_c, + int out_d, + int out_h, + int out_w, + int in_d, + int in_h, + int in_w, + const T* input, + const T* grid, + T* output, + const Mode interpolation_mode, + const PaddingMode padding_mode, + bool align_corners) { + int inp_sW = 1; + int inp_sH = in_w; + int inp_sD = in_h * in_w; + int inp_sC = in_d * inp_sD; + int inp_sN = out_c * inp_sC; + + int grid_sCoor = 1; + int grid_sW = 3; + int grid_sH = out_w * grid_sW; + int grid_sD = out_h * grid_sH; + int grid_sN = out_d * grid_sD; + + int out_sW = 1; + int out_sH = out_w; + int out_sD = out_h * out_w; + int out_sC = out_d * out_sD; + int out_sN = out_c * out_sC; + + CUDA_KERNEL_LOOP_TYPE(index, nthreads, int) { + const int w = index % out_w; + const int h = (index / out_w) % out_h; + const int d = (index / (out_h * out_w)) % out_d; + const int n = index / (out_d * out_h * out_w); + const int grid_offset = + n * grid_sN + d * grid_sD + h * grid_sH + w * grid_sW; + // get the corresponding input x, y, z co-ordinates from grid + T ix = grid[grid_offset]; + T iy = grid[grid_offset + grid_sCoor]; + T iz = grid[grid_offset + 2 * grid_sCoor]; + ix = ComputePositions(ix, in_w, padding_mode, align_corners); + iy = ComputePositions(iy, in_h, padding_mode, align_corners); + iz = ComputePositions(iz, in_d, padding_mode, align_corners); + if (interpolation_mode == Mode::bilinear) { + // get corner pixel values from (x, y, z) + // for 4d, we used north-east-south-west + // for 5d, we add top-bottom + int ix_tnw = static_cast(std::floor(ix)); + int iy_tnw = static_cast(std::floor(iy)); + int iz_tnw = static_cast(std::floor(iz)); + + int ix_tne = ix_tnw + 1; + int iy_tne = iy_tnw; + int iz_tne = iz_tnw; + + int ix_tsw = ix_tnw; + int iy_tsw = iy_tnw + 1; + int iz_tsw = iz_tnw; + + int ix_tse = ix_tnw + 1; + int iy_tse = iy_tnw + 1; + int iz_tse = iz_tnw; + + int ix_bnw = ix_tnw; + int iy_bnw = iy_tnw; + int iz_bnw = iz_tnw + 1; + + int ix_bne = ix_tnw + 1; + int iy_bne = iy_tnw; + int iz_bne = iz_tnw + 1; + + int ix_bsw = ix_tnw; + int iy_bsw = iy_tnw + 1; + int iz_bsw = iz_tnw + 1; + + int ix_bse = ix_tnw + 1; + int iy_bse = iy_tnw + 1; + int iz_bse = iz_tnw + 1; + + // get surfaces to each neighbor: + T tnw = (ix_bse - ix) * (iy_bse - iy) * (iz_bse - iz); + T tne = (ix - ix_bsw) * (iy_bsw - iy) * (iz_bsw - iz); + T tsw = (ix_bne - ix) * (iy - iy_bne) * (iz_bne - iz); + T tse = (ix - ix_bnw) * (iy - iy_bnw) * (iz_bnw - iz); + T bnw = (ix_tse - ix) * (iy_tse - iy) * (iz - iz_tse); + T bne = (ix - ix_tsw) * (iy_tsw - iy) * (iz - iz_tsw); + T bsw = (ix_tne - ix) * (iy - iy_tne) * (iz - iz_tne); + T bse = (ix - ix_tnw) * (iy - iy_tnw) * (iz - iz_tnw); + + auto inp_ptr_NC = input + n * inp_sN; + auto out_ptr_NCDHW = + output + n * out_sN + d * out_sD + h * out_sH + w * out_sW; + for (int c = 0; c < out_c; + ++c, inp_ptr_NC += inp_sC, out_ptr_NCDHW += out_sC) { + *out_ptr_NCDHW = static_cast(0); + if (InBounds3D(iz_tnw, iy_tnw, ix_tnw, in_d, in_h, in_w)) { + *out_ptr_NCDHW += + inp_ptr_NC[iz_tnw * inp_sD + iy_tnw * inp_sH + ix_tnw * inp_sW] * + tnw; + } + if (InBounds3D(iz_tne, iy_tne, ix_tne, in_d, in_h, in_w)) { + *out_ptr_NCDHW += + inp_ptr_NC[iz_tne * inp_sD + iy_tne * inp_sH + ix_tne * inp_sW] * + tne; + } + if (InBounds3D(iz_tsw, iy_tsw, ix_tsw, in_d, in_h, in_w)) { + *out_ptr_NCDHW += + inp_ptr_NC[iz_tsw * inp_sD + iy_tsw * inp_sH + ix_tsw * inp_sW] * + tsw; + } + if (InBounds3D(iz_tse, iy_tse, ix_tse, in_d, in_h, in_w)) { + *out_ptr_NCDHW += + inp_ptr_NC[iz_tse * inp_sD + iy_tse * inp_sH + ix_tse * inp_sW] * + tse; + } + if (InBounds3D(iz_bnw, iy_bnw, ix_bnw, in_d, in_h, in_w)) { + *out_ptr_NCDHW += + inp_ptr_NC[iz_bnw * inp_sD + iy_bnw * inp_sH + ix_bnw * inp_sW] * + bnw; + } + if (InBounds3D(iz_bne, iy_bne, ix_bne, in_d, in_h, in_w)) { + *out_ptr_NCDHW += + inp_ptr_NC[iz_bne * inp_sD + iy_bne * inp_sH + ix_bne * inp_sW] * + bne; + } + if (InBounds3D(iz_bsw, iy_bsw, ix_bsw, in_d, in_h, in_w)) { + *out_ptr_NCDHW += + inp_ptr_NC[iz_bsw * inp_sD + iy_bsw * inp_sH + ix_bsw * inp_sW] * + bsw; + } + if (InBounds3D(iz_bse, iy_bse, ix_bse, in_d, in_h, in_w)) { + *out_ptr_NCDHW += + inp_ptr_NC[iz_bse * inp_sD + iy_bse * inp_sH + ix_bse * inp_sW] * + bse; + } + } + } else if (interpolation_mode == Mode::nearest) { + int ix_nearest = static_cast(std::round(ix)); + int iy_nearest = static_cast(std::round(iy)); + int iz_nearest = static_cast(std::round(iz)); + + // assign nearest neighor pixel value to output pixel + auto inp_ptr_NC = input + n * inp_sN; + auto out_ptr_NCDHW = + output + n * out_sN + d * out_sD + h * out_sH + w * out_sW; + for (int c = 0; c < out_c; + ++c, inp_ptr_NC += inp_sC, out_ptr_NCDHW += out_sC) { + if (InBounds3D(iz_nearest, iy_nearest, ix_nearest, in_d, in_h, in_w)) { + *out_ptr_NCDHW = + inp_ptr_NC[iz_nearest * inp_sD + iy_nearest * inp_sH + + ix_nearest * inp_sW]; + } else { + *out_ptr_NCDHW = static_cast(0); + } + } + } + } +} + template void GridSampleKernel(const Context& dev_ctx, const DenseTensor& x, @@ -193,38 +356,78 @@ void GridSampleKernel(const Context& dev_ctx, enum_mode = Mode::bilinear; } - const int n = grid.dims()[0]; - const int out_h = grid.dims()[1]; - const int out_w = grid.dims()[2]; - const int c = x.dims()[1]; - const int in_h = x.dims()[2]; - const int in_w = x.dims()[3]; - VLOG(3) << "n: " << n << "; c: " << c << "; out_h: " << out_h - << "; out_w: " << out_w; - - auto* output_data = dev_ctx.template Alloc(out); - VLOG(3) << "out dims: " << out->dims()[0] << "; " << out->dims()[1] << "; " - << out->dims()[2] << "; " << out->dims()[3]; - - int count = static_cast(n * out_h * out_w); - auto cu_stream = dev_ctx.stream(); - backends::gpu::GpuLaunchConfig config = - backends::gpu::GetGpuLaunchConfig1D(dev_ctx, count); - GridSampleCudaKernel - <<>>( - count, - n, - c, - out_h, - out_w, - in_h, - in_w, - x.data(), - grid.data(), - output_data, - enum_mode, - enum_padding_mode, - align_corners); + if (x.dims().size() == 4) { + const int n = grid.dims()[0]; + const int out_h = grid.dims()[1]; + const int out_w = grid.dims()[2]; + const int c = x.dims()[1]; + const int in_h = x.dims()[2]; + const int in_w = x.dims()[3]; + VLOG(3) << "n: " << n << "; c: " << c << "; out_h: " << out_h + << "; out_w: " << out_w; + + auto* output_data = dev_ctx.template Alloc(out); + VLOG(3) << "out dims: " << out->dims()[0] << "; " << out->dims()[1] << "; " + << out->dims()[2] << "; " << out->dims()[3]; + + int count = static_cast(n * out_h * out_w); + auto cu_stream = dev_ctx.stream(); + backends::gpu::GpuLaunchConfig config = + backends::gpu::GetGpuLaunchConfig1D(dev_ctx, count); + GridSampleCudaKernel + <<>>( + count, + n, + c, + out_h, + out_w, + in_h, + in_w, + x.data(), + grid.data(), + output_data, + enum_mode, + enum_padding_mode, + align_corners); + } else { + const int n = grid.dims()[0]; + const int out_d = grid.dims()[1]; + const int out_h = grid.dims()[2]; + const int out_w = grid.dims()[3]; + const int c = x.dims()[1]; + const int in_d = x.dims()[2]; + const int in_h = x.dims()[3]; + const int in_w = x.dims()[4]; + + VLOG(3) << "n: " << n << "; c: " << c << "; out_d: " << out_d + << "; out_h: " << out_h << "; out_w: " << out_w; + + auto* output_data = dev_ctx.template Alloc(out); + VLOG(3) << "out dims: " << out->dims()[0] << "; " << out->dims()[1] << "; " + << out->dims()[2] << "; " << out->dims()[3] << "; " + << out->dims()[4]; + + int count = static_cast(n * out_d * out_h * out_w); + auto cu_stream = dev_ctx.stream(); + backends::gpu::GpuLaunchConfig config = + backends::gpu::GetGpuLaunchConfig1D(dev_ctx, count); + GridSample3DCudaKernel + <<>>( + count, + c, + out_d, + out_h, + out_w, + in_d, + in_h, + in_w, + x.data(), + grid.data(), + output_data, + enum_mode, + enum_padding_mode, + align_corners); + } } } // namespace phi diff --git a/paddle/phi/kernels/gpu/grid_sample_utils.h b/paddle/phi/kernels/gpu/grid_sample_utils.h index 098eb9defb5..bd5e859a59d 100644 --- a/paddle/phi/kernels/gpu/grid_sample_utils.h +++ b/paddle/phi/kernels/gpu/grid_sample_utils.h @@ -27,4 +27,9 @@ static __forceinline__ __device__ bool InBounds(int h, int w, int H, int W) { return h >= 0 && h < H && w >= 0 && w < W; } +static __forceinline__ __device__ bool InBounds3D( + int d, int h, int w, int D, int H, int W) { + return d >= 0 && d < D && h >= 0 && h < H && w >= 0 && w < W; +} + } // namespace phi diff --git a/python/paddle/fluid/tests/unittests/test_grid_sampler_op.py b/python/paddle/fluid/tests/unittests/test_grid_sampler_op.py index 4b813330b40..8f9be0ddcab 100644 --- a/python/paddle/fluid/tests/unittests/test_grid_sampler_op.py +++ b/python/paddle/fluid/tests/unittests/test_grid_sampler_op.py @@ -20,6 +20,15 @@ from op_test import OpTest, skip_check_grad_ci paddle.enable_static() +from white_list import ( + op_accuracy_white_list, + check_shape_white_list, + compile_vs_runtime_white_list, + no_check_set_white_list, + op_threshold_white_list, + no_grad_set_white_list, +) + def AffineGrid(theta, grid_shape): n = grid_shape[0] @@ -64,6 +73,68 @@ def getGridPointValue(data, x, y): return out +def AffineGrid3D(theta, grid_shape): + n = grid_shape[0] + d = grid_shape[1] + h = grid_shape[2] + w = grid_shape[3] + d_idx = np.repeat(np.repeat(np.linspace(-1, 1, d)[:, np.newaxis, + np.newaxis], + h, + axis=1), + w, + axis=2)[:, :, :, np.newaxis] + h_idx = np.repeat(np.repeat(np.linspace(-1, 1, h)[np.newaxis, :, + np.newaxis], + w, + axis=2), + d, + axis=0)[:, :, :, np.newaxis] + w_idx = np.repeat(np.repeat(np.linspace(-1, 1, w)[np.newaxis, + np.newaxis, :], + h, + axis=1), + d, + axis=0)[:, :, :, np.newaxis] + grid = np.concatenate( + [w_idx, h_idx, d_idx, np.ones([d, h, w, 1])], axis=3) # d * h * w * 4 + grid = np.repeat(grid[np.newaxis, :], n, axis=0) # n * d * h * w *4 + ret = np.zeros([n, d * h * w, 3]) + theta = theta.transpose([0, 2, 1]) + for i in range(len(theta)): + ret[i] = np.dot(grid[i].reshape([d * h * w, 4]), theta[i]) + + return ret.reshape([n, d, h, w, 3]).astype("float64") + + +def getGridPointValue3D(data, x, y, z): + data_shape = data.shape + N = data_shape[0] + C = data_shape[1] + in_D = data_shape[2] + in_H = data_shape[3] + in_W = data_shape[4] + out_D = x.shape[1] + out_H = x.shape[2] + out_W = x.shape[3] + + #out = np.zeros(data_shape, dtype='float64') + out = np.zeros([N, C, out_D, out_H, out_W], dtype='float64') + for i in range(N): + for j in range(out_D): + for k in range(out_H): + for l in range(out_W): + if y[i, j, k, l] < 0 or y[i, j, k, l] > in_H - 1 or x[ + i, j, k, l] < 0 or x[i, j, k, l] > in_W - 1 or z[ + i, j, k, l] < 0 or z[i, j, k, l] > in_D - 1: + out[i, :, j, k, l] = 0 + else: + out[i, :, j, k, l] = data[i, :, z[i, j, k, l], + y[i, j, k, l], x[i, j, k, l]] + + return out + + def clip(x, min_n, max_n): return np.maximum(np.minimum(x, max_n), min_n) @@ -138,6 +209,80 @@ def GridSampler(data, return out +def GridSampler3D(data, + grid, + align_corners=True, + mode="bilinear", + padding_mode="zeros"): + dims = data.shape + N = dims[0] + in_C = dims[1] + in_D = dims[2] + in_H = dims[3] + in_W = dims[4] + + out_D = grid.shape[1] + out_H = grid.shape[2] + out_W = grid.shape[3] + + x = grid[:, :, :, :, 0] + y = grid[:, :, :, :, 1] + z = grid[:, :, :, :, 2] + + z_max = in_D - 1 + y_max = in_H - 1 + x_max = in_W - 1 + + x = unnormalizeAndClip(x, x_max, align_corners, padding_mode) + y = unnormalizeAndClip(y, y_max, align_corners, padding_mode) + z = unnormalizeAndClip(z, z_max, align_corners, padding_mode) + + if mode == "bilinear": + x0 = np.floor(x).astype('int32') + x1 = x0 + 1 + y0 = np.floor(y).astype('int32') + y1 = y0 + 1 + z0 = np.floor(z).astype('int32') + z1 = z0 + 1 + + w_tnw = np.tile(((x1 - x) * (y1 - y) * (z1 - z)).reshape( + (N, 1, out_D, out_H, out_W)), (1, in_C, 1, 1, 1)) + w_tne = np.tile(((x - x0) * (y1 - y) * (z1 - z)).reshape( + (N, 1, out_D, out_H, out_W)), (1, in_C, 1, 1, 1)) + w_tsw = np.tile(((x1 - x) * (y - y0) * (z1 - z)).reshape( + (N, 1, out_D, out_H, out_W)), (1, in_C, 1, 1, 1)) + w_tse = np.tile(((x - x0) * (y - y0) * (z1 - z)).reshape( + (N, 1, out_D, out_H, out_W)), (1, in_C, 1, 1, 1)) + w_bnw = np.tile(((x1 - x) * (y1 - y) * (z - z0)).reshape( + (N, 1, out_D, out_H, out_W)), (1, in_C, 1, 1, 1)) + w_bne = np.tile(((x - x0) * (y1 - y) * (z - z0)).reshape( + (N, 1, out_D, out_H, out_W)), (1, in_C, 1, 1, 1)) + w_bsw = np.tile(((x1 - x) * (y - y0) * (z - z0)).reshape( + (N, 1, out_D, out_H, out_W)), (1, in_C, 1, 1, 1)) + w_bse = np.tile(((x - x0) * (y - y0) * (z - z0)).reshape( + (N, 1, out_D, out_H, out_W)), (1, in_C, 1, 1, 1)) + + v_tnw = getGridPointValue3D(data, x0, y0, z0) + v_tne = getGridPointValue3D(data, x1, y0, z0) + v_tsw = getGridPointValue3D(data, x0, y1, z0) + v_tse = getGridPointValue3D(data, x1, y1, z0) + v_bnw = getGridPointValue3D(data, x0, y0, z1) + v_bne = getGridPointValue3D(data, x1, y0, z1) + v_bsw = getGridPointValue3D(data, x0, y1, z1) + v_bse = getGridPointValue3D(data, x1, y1, z1) + + out = (w_tnw * v_tnw + w_tne * v_tne + w_tsw * v_tsw + w_tse * v_tse + + w_bnw * v_bnw + w_bne * v_bne + w_bsw * v_bsw + + w_bse * v_bse).astype('float64') + + elif mode == "nearest": + x = np.round(x).astype('int32') + y = np.round(y).astype('int32') + z = np.round(z).astype('int32') + out = getGridPointValue3D(data, x, y, z) + return out + + class TestGridSamplerOp(OpTest): def setUp(self): @@ -150,36 +295,90 @@ class TestGridSamplerOp(OpTest): self.mode = "bilinear" self.initTestCase() x = np.random.randint(0, 255, self.x_shape).astype('float64') - theta = np.zeros(self.theta_shape).astype('float64') - for i in range(self.theta_shape[0]): - for j in range(2): - for k in range(3): - theta[i, j, k] = np.random.rand(1)[0] - grid = AffineGrid(theta, self.grid_shape) - - self.inputs = {'X': x, 'Grid': grid} - self.attrs = { - 'use_cudnn': self.use_cudnn, - "align_corners": self.align_corners, - "padding_mode": self.padding_mode, - "mode": self.mode - } - self.outputs = { - 'Output': - GridSampler(x, grid, self.align_corners, self.mode, - self.padding_mode) - } + + if len(self.grid_shape) == 4: + for i in range(self.theta_shape[0]): + for j in range(2): + for k in range(3): + theta[i, j, k] = np.random.rand(1)[0] + grid = AffineGrid(theta, self.grid_shape) + self.inputs = {'X': x, 'Grid': grid} + self.attrs = { + 'use_cudnn': self.use_cudnn, + "align_corners": self.align_corners, + "padding_mode": self.padding_mode, + "mode": self.mode + } + self.outputs = { + 'Output': + GridSampler(x, grid, self.align_corners, self.mode, + self.padding_mode) + } + else: + for i in range(self.theta_shape[0]): + for j in range(3): + for k in range(4): + theta[i, j, k] = np.random.rand(1)[0] + grid = AffineGrid3D(theta, self.grid_shape) + self.inputs = {'X': x, 'Grid': grid} + self.attrs = { + 'use_cudnn': self.use_cudnn, + "align_corners": self.align_corners, + "padding_mode": self.padding_mode, + "mode": self.mode + } + self.outputs = { + 'Output': + GridSampler3D(x, grid, self.align_corners, self.mode, + self.padding_mode) + } + + def get_places(self): + places = [] + if core.is_compiled_with_cuda(): + places.append(core.CUDAPlace(0)) + return places def test_check_output(self): - self.check_output(check_eager=True) + if len(self.grid_shape) == 4: + self.check_output(check_eager=True) + else: + check_eager_flag = True + check_dygraph_flag = False + for place in self.get_places(): + res = self.check_output_with_place( + place, + atol=1e-5, + check_dygraph=check_dygraph_flag, + check_eager=check_eager_flag) + if check_eager_flag: + assert check_dygraph_flag == False + outs, eager_dygraph_outs, fetch_list = res + elif check_dygraph_flag: + uts, dygraph_outs, fetch_list = res + else: + outs, fetch_list = res + if self.op_type not in compile_vs_runtime_white_list.COMPILE_RUN_OP_WHITE_LIST: + self.check_compile_vs_runtime(fetch_list, outs) def test_check_grad_normal(self): - self.check_grad(['X', 'Grid'], - 'Output', - max_relative_error=0.01, - numeric_grad_delta=self.numeric_grad_delta, - check_eager=True) + if len(self.grid_shape) == 4: + self.check_grad(['X', 'Grid'], + 'Output', + max_relative_error=0.01, + numeric_grad_delta=self.numeric_grad_delta, + check_eager=True) + else: + self._check_grad_helper() + for place in self.get_places(): + self.check_grad_with_place( + place, ['X'], + 'Output', + numeric_grad_delta=self.numeric_grad_delta, + max_relative_error=0.01, + check_eager=True, + check_dygraph=False) def initTestCase(self): self.x_shape = (2, 3, 8, 8) @@ -285,5 +484,102 @@ class Case5(LargeInputCase): self.use_cudnn = False if core.is_compiled_with_rocm() else True +class Case6(TestGridSamplerOp): + + def initTestCase(self): + self.x_shape = (2, 3, 5, 6, 7) + self.grid_shape = (2, 8, 9, 10, 3) + self.theta_shape = (2, 3, 4) + self.align_corners = False + self.padding_mode = "zeros" + self.mode = "bilinear" + + +class Case6_(TestGridSamplerOp): + + def get_places(self): + places = [] + if core.is_compiled_with_cuda(): + places.append(core.CUDAPlace(0)) + return places + + def initTestCase(self): + self.x_shape = (2, 3, 5, 6, 7) + self.grid_shape = (2, 8, 9, 10, 3) + self.theta_shape = (2, 3, 4) + self.align_corners = False + self.padding_mode = "border" + self.mode = "bilinear" + + +class Case7(TestGridSamplerOp): + + def initTestCase(self): + self.x_shape = (2, 3, 5, 6, 7) + self.grid_shape = (2, 8, 9, 10, 3) + self.theta_shape = (2, 3, 4) + self.align_corners = False + self.padding_mode = "reflection" + self.mode = "bilinear" + + +class Case8(TestGridSamplerOp): + + def initTestCase(self): + self.x_shape = (2, 3, 5, 6, 7) + self.grid_shape = (2, 8, 9, 10, 3) + self.theta_shape = (2, 3, 4) + self.align_corners = True + self.padding_mode = "reflection" + self.mode = "bilinear" + + +class Case9(TestGridSamplerOp): + + def initTestCase(self): + self.x_shape = (2, 3, 5, 6, 7) + self.grid_shape = (2, 8, 9, 10, 3) + self.theta_shape = (2, 3, 4) + self.align_corners = False + self.padding_mode = "reflection" + self.mode = "nearest" + self.numeric_grad_delta = 0.0001 + + +@skip_check_grad_ci(reason="'check_grad' on large inputs is too slow, " + + "however it is desirable to cover the forward pass") +class LargeInput3DCase(TestGridSamplerOp): + + def initTestCase(self): + self.no_need_check_grad = True + self.x_shape = (2, 3, 24, 24, 12) + self.grid_shape = (2, 25, 25, 12, 3) + self.theta_shape = (2, 3, 4) + self.align_corners = False + self.padding_mode = "reflection" + self.mode = "bilinear" + self.use_cudnn = False + self.__class__.op_type = 'grid_sampler' + + def test_check_grad_normal(self): + pass + + +@skip_check_grad_ci(reason="'check_grad' on large inputs is too slow, " + + "however it is desirable to cover the forward pass") +class Case10(LargeInput3DCase): + + def initTestCase(self): + self.no_need_check_grad = True + self.x_shape = (2, 3, 24, 24, 12) + self.grid_shape = (2, 25, 25, 12, 3) + self.theta_shape = (2, 3, 4) + self.align_corners = True + self.padding_mode = "zeros" + self.mode = "bilinear" + self.use_cudnn = False + self.__class__.op_type = 'grid_sampler' + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/nn/functional/vision.py b/python/paddle/nn/functional/vision.py index 293b055d8e2..b94e6ec95d1 100644 --- a/python/paddle/nn/functional/vision.py +++ b/python/paddle/nn/functional/vision.py @@ -127,12 +127,21 @@ def grid_sample(x, """ This operation samples input X by using bilinear interpolation or nearest interpolation based on flow field grid, which is usually - generated by :code:`affine_grid` . The grid of shape [N, H, W, 2] - is the concatenation of (x, y) coordinates with shape [N, H, W] each, - where x is indexing the 4th dimension (in width dimension) of input - data x and y is indexing the 3rd dimension (in height dimension), - finally results is the bilinear interpolation or nearest value of 4 nearest corner - points. The output tensor shape will be [N, C, H, W]. + generated by :code:`affine_grid` . When the input X is 4-D Tensor, + the grid of shape [N, H, W, 2] is the concatenation of (x, y) + coordinates with shape [N, H, W] each, where x is indexing the 4th + dimension (in width dimension) of input data x and y is indexing + the 3rd dimension (in height dimension), finally results is the + bilinear interpolation or nearest value of 4 nearest corner + points. The output tensor shape will be [N, C, H, W]. When the input X + is 5-D Tensor, the grid of shape [N, D, H, W, 3] is the concatenation + of (x, y, z) coordinates with shape [N, D, H, W] each, where x is + indexing the 5th dimension (in width dimension) of input data x, y is + indexing the 4th dimension (in height dimension) and z is indexing the + 3rd dimension (in depth dimension) finally results is the bilinear + interpolation or nearest value of 8 nearest cornerpoints. The output + tensor shape will be [N, C, D, H, W]. + Step 1: @@ -181,11 +190,13 @@ def grid_sample(x, Args: x(Tensor): The input tensor, which is a 4-d tensor with shape - [N, C, H, W], N is the batch size, C is the channel - number, H and W is the feature height and width. + [N, C, H, W] or a 5-d tensor with shape [N, C, D, H, W], + N is the batch size, C is the channel number, + D, H and W is the feature depth, height and width. The data type is float32 or float64. - grid(Tensor): Input grid tensor of shape [N, grid_H, grid_W, 2]. The - data type is float32 or float64. + grid(Tensor): Input grid tensor, which is a 4-d tensor with shape [N, grid_H, + grid_W, 2] or a 5-d tensor with shape [N, grid_D, grid_H, + grid_W, 3]. The data type is float32 or float64. mode(str, optional): The interpolation method which can be 'bilinear' or 'nearest'. Default: 'bilinear'. padding_mode(str, optional) The padding method used when source index @@ -199,7 +210,8 @@ def grid_sample(x, None by default. Returns: - Tensor, The shape of output is [N, C, grid_H, grid_W] in which `grid_H` is the height of grid and `grid_W` is the width of grid. The data type is same as input tensor. + Tensor, The shape of output is [N, C, grid_H, grid_W] or [N, C, grid_D, grid_H, grid_W] in which `grid_D` is the depth of grid, + `grid_H` is the height of grid and `grid_W` is the width of grid. The data type is same as input tensor. Examples: @@ -207,31 +219,24 @@ def grid_sample(x, import paddle import paddle.nn.functional as F - import numpy as np - - # shape=[1, 1, 3, 3] - x = np.array([[[[-0.6, 0.8, -0.5], - [-0.5, 0.2, 1.2], - [ 1.4, 0.3, -0.2]]]]).astype("float64") + # x shape=[1, 1, 3, 3] + x = paddle.to_tensor([[[[-0.6, 0.8, -0.5], + [-0.5, 0.2, 1.2], + [ 1.4, 0.3, -0.2]]]],dtype='float64') # grid shape = [1, 3, 4, 2] - grid = np.array( - [[[[ 0.2, 0.3], - [-0.4, -0.3], - [-0.9, 0.3], - [-0.9, -0.6]], - [[ 0.4, 0.1], - [ 0.9, -0.8], - [ 0.4, 0.5], - [ 0.5, -0.2]], - [[ 0.1, -0.8], - [-0.3, -1. ], - [ 0.7, 0.4], - [ 0.2, 0.8]]]]).astype("float64") - - - x = paddle.to_tensor(x) - grid = paddle.to_tensor(grid) + grid = paddle.to_tensor([[[[ 0.2, 0.3], + [-0.4, -0.3], + [-0.9, 0.3], + [-0.9, -0.6]], + [[ 0.4, 0.1], + [ 0.9, -0.8], + [ 0.4, 0.5], + [ 0.5, -0.2]], + [[ 0.1, -0.8], + [-0.3, -1. ], + [ 0.7, 0.4], + [ 0.2, 0.8]]]],dtype='float64') y_t = F.grid_sample( x, grid, -- GitLab