提交 ff6329bd 编写于 作者: D dengkaipeng

fix some inappropriate expressions in api doc for grid_sampler. test=develop

上级 593e1b18
...@@ -22,107 +22,111 @@ using framework::Tensor; ...@@ -22,107 +22,111 @@ using framework::Tensor;
using ScopedTensorDescriptor = platform::ScopedTensorDescriptor; using ScopedTensorDescriptor = platform::ScopedTensorDescriptor;
using DataLayout = platform::DataLayout; using DataLayout = platform::DataLayout;
using ScopedSpatialTransformerDescriptor = using ScopedSpatialTransformerDescriptor =
platform::ScopedSpatialTransformerDescriptor; platform::ScopedSpatialTransformerDescriptor;
template <typename T> template <typename T>
using CudnnDataType = platform::CudnnDataType<T>; using CudnnDataType = platform::CudnnDataType<T>;
template <typename T> template <typename T>
class CUDNNGridSampleOpKernel : public framework::OpKernel<T> { class CUDNNGridSampleOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
"It must use CUDAPlace"); "It must use CUDAPlace");
auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>(); auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
auto handle = dev_ctx.cudnn_handle(); auto handle = dev_ctx.cudnn_handle();
auto* input = ctx.Input<Tensor>("X"); auto* input = ctx.Input<Tensor>("X");
auto* grid = ctx.Input<Tensor>("Grid"); auto* grid = ctx.Input<Tensor>("Grid");
auto* output = ctx.Output<Tensor>("Output"); auto* output = ctx.Output<Tensor>("Output");
int n = input->dims()[0]; int n = input->dims()[0];
int c = input->dims()[1]; int c = input->dims()[1];
int h = input->dims()[2]; int h = input->dims()[2];
int w = input->dims()[3]; int w = input->dims()[3];
const int size[4] = {n, c, h, w}; const int size[4] = {n, c, h, w};
const T* input_data = input->data<T>(); const T* input_data = input->data<T>();
const T* grid_data = grid->data<T>(); const T* grid_data = grid->data<T>();
T* output_data = output->mutable_data<T>({n, c, h, w}, ctx.GetPlace()); T* output_data = output->mutable_data<T>({n, c, h, w}, ctx.GetPlace());
ScopedSpatialTransformerDescriptor st_desc; ScopedSpatialTransformerDescriptor st_desc;
cudnnSpatialTransformerDescriptor_t cudnn_st_desc = cudnnSpatialTransformerDescriptor_t cudnn_st_desc =
st_desc.descriptor<T>(4, size); st_desc.descriptor<T>(4, size);
ScopedTensorDescriptor input_desc; ScopedTensorDescriptor input_desc;
ScopedTensorDescriptor output_desc; ScopedTensorDescriptor output_desc;
cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>( cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>(
DataLayout::kNCHW, framework::vectorize2int(input->dims())); DataLayout::kNCHW, framework::vectorize2int(input->dims()));
cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor<T>( cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor<T>(
DataLayout::kNCHW, framework::vectorize2int(output->dims())); DataLayout::kNCHW, framework::vectorize2int(output->dims()));
CUDNN_ENFORCE(platform::dynload::cudnnSpatialTfSamplerForward( CUDNN_ENFORCE(platform::dynload::cudnnSpatialTfSamplerForward(
handle, cudnn_st_desc, CudnnDataType<T>::kOne(), cudnn_input_desc, input_data, handle, cudnn_st_desc, CudnnDataType<T>::kOne(), cudnn_input_desc,
grid_data, CudnnDataType<T>::kZero(), cudnn_output_desc, output_data)); input_data, grid_data, CudnnDataType<T>::kZero(), cudnn_output_desc,
} output_data));
}
}; };
template <typename T> template <typename T>
class CUDNNGridSampleGradOpKernel : public framework::OpKernel<T> { class CUDNNGridSampleGradOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
"It must use CUDAPlace"); "It must use CUDAPlace");
auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>(); auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
auto handle = dev_ctx.cudnn_handle(); auto handle = dev_ctx.cudnn_handle();
auto* input = ctx.Input<Tensor>("X"); auto* input = ctx.Input<Tensor>("X");
auto* grid = ctx.Input<Tensor>("Grid"); auto* grid = ctx.Input<Tensor>("Grid");
auto* output_grad = ctx.Input<Tensor>(framework::GradVarName("Output")); auto* output_grad = ctx.Input<Tensor>(framework::GradVarName("Output"));
auto* input_grad = ctx.Output<Tensor>(framework::GradVarName("X")); auto* input_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
auto* grid_grad = ctx.Output<Tensor>(framework::GradVarName("Grid")); auto* grid_grad = ctx.Output<Tensor>(framework::GradVarName("Grid"));
auto output_grad_dims = output_grad->dims(); auto output_grad_dims = output_grad->dims();
const int n = output_grad_dims[0]; const int n = output_grad_dims[0];
const int c = output_grad_dims[1]; const int c = output_grad_dims[1];
const int h = output_grad_dims[2]; const int h = output_grad_dims[2];
const int w = output_grad_dims[3]; const int w = output_grad_dims[3];
const int size[4] = {n, c, h, w}; const int size[4] = {n, c, h, w};
ScopedSpatialTransformerDescriptor st_dest; ScopedSpatialTransformerDescriptor st_dest;
cudnnSpatialTransformerDescriptor_t cudnn_st_dest = cudnnSpatialTransformerDescriptor_t cudnn_st_dest =
st_dest.descriptor<T>(4, size); st_dest.descriptor<T>(4, size);
const T* input_data = input->data<T>(); const T* input_data = input->data<T>();
const T* grid_data = grid->data<T>(); const T* grid_data = grid->data<T>();
const T* output_grad_data = output_grad->data<T>(); const T* output_grad_data = output_grad->data<T>();
T* input_grad_data = input_grad->mutable_data<T>(output_grad_dims, ctx.GetPlace()); T* input_grad_data =
T* grid_grad_data = grid_grad->mutable_data<T>({n, h, w, 2}, ctx.GetPlace()); input_grad->mutable_data<T>(output_grad_dims, ctx.GetPlace());
T* grid_grad_data =
ScopedTensorDescriptor input_desc; grid_grad->mutable_data<T>({n, h, w, 2}, ctx.GetPlace());
ScopedTensorDescriptor input_grad_desc;
ScopedTensorDescriptor output_grad_desc; ScopedTensorDescriptor input_desc;
cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>( ScopedTensorDescriptor input_grad_desc;
DataLayout::kNCHW, framework::vectorize2int(input->dims())); ScopedTensorDescriptor output_grad_desc;
cudnnTensorDescriptor_t cudnn_input_grad_desc = input_grad_desc.descriptor<T>( cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>(
DataLayout::kNCHW, framework::vectorize2int(input_grad->dims())); DataLayout::kNCHW, framework::vectorize2int(input->dims()));
cudnnTensorDescriptor_t cudnn_output_grad_desc = output_grad_desc.descriptor<T>( cudnnTensorDescriptor_t cudnn_input_grad_desc =
DataLayout::kNCHW, framework::vectorize2int(output_grad->dims())); input_grad_desc.descriptor<T>(
DataLayout::kNCHW, framework::vectorize2int(input_grad->dims()));
CUDNN_ENFORCE(platform::dynload::cudnnSpatialTfSamplerBackward( cudnnTensorDescriptor_t cudnn_output_grad_desc =
handle, cudnn_st_dest, CudnnDataType<T>::kOne(), output_grad_desc.descriptor<T>(
cudnn_input_desc, input_data, CudnnDataType<T>::kZero(), DataLayout::kNCHW, framework::vectorize2int(output_grad->dims()));
cudnn_input_grad_desc, input_grad_data, CudnnDataType<T>::kOne(),
cudnn_output_grad_desc, output_grad_data, grid_data, CUDNN_ENFORCE(platform::dynload::cudnnSpatialTfSamplerBackward(
CudnnDataType<T>::kZero(), grid_grad_data)); handle, cudnn_st_dest, CudnnDataType<T>::kOne(), cudnn_input_desc,
} input_data, CudnnDataType<T>::kZero(), cudnn_input_grad_desc,
input_grad_data, CudnnDataType<T>::kOne(), cudnn_output_grad_desc,
output_grad_data, grid_data, CudnnDataType<T>::kZero(),
grid_grad_data));
}
}; };
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
namespace plat = paddle::platform; namespace plat = paddle::platform;
REGISTER_OP_KERNEL(grid_sampler, CUDNN, plat::CUDAPlace, REGISTER_OP_KERNEL(grid_sampler, CUDNN, plat::CUDAPlace,
paddle::operators::CUDNNGridSampleOpKernel<float>, paddle::operators::CUDNNGridSampleOpKernel<float>,
paddle::operators::CUDNNGridSampleOpKernel<double>); paddle::operators::CUDNNGridSampleOpKernel<double>);
REGISTER_OP_KERNEL(grid_sampler_grad, CUDNN, plat::CUDAPlace, REGISTER_OP_KERNEL(grid_sampler_grad, CUDNN, plat::CUDAPlace,
paddle::operators::CUDNNGridSampleGradOpKernel<float>, paddle::operators::CUDNNGridSampleGradOpKernel<float>,
paddle::operators::CUDNNGridSampleGradOpKernel<double>); paddle::operators::CUDNNGridSampleGradOpKernel<double>);
...@@ -24,70 +24,76 @@ namespace operators { ...@@ -24,70 +24,76 @@ namespace operators {
using Tensor = framework::Tensor; using Tensor = framework::Tensor;
class GridSampleOp : public framework::OperatorWithKernel { class GridSampleOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of GridSampleOp should not be null."); "Input(X) of GridSampleOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Grid"), PADDLE_ENFORCE(ctx->HasInput("Grid"),
"Input(Grid) of GridSampleOp should not be null."); "Input(Grid) of GridSampleOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Output"), PADDLE_ENFORCE(ctx->HasOutput("Output"),
"Output(Output) of GridSampleOp should not be null."); "Output(Output) of GridSampleOp should not be null.");
auto x_dims = ctx->GetInputDim("X"); auto x_dims = ctx->GetInputDim("X");
auto grid_dims = ctx->GetInputDim("Grid"); auto grid_dims = ctx->GetInputDim("Grid");
PADDLE_ENFORCE(x_dims.size() == 4, "Input(X) of GridSampleOp should be 4-D Tensor."); PADDLE_ENFORCE(x_dims.size() == 4,
PADDLE_ENFORCE(grid_dims.size() == 4, "Input(Grid) of GridSampleOp should be 4-D Tensor."); "Input(X) of GridSampleOp should be 4-D Tensor.");
PADDLE_ENFORCE(grid_dims[3] == 2, "Input(Grid) dims[3] should be 2."); PADDLE_ENFORCE(grid_dims.size() == 4,
PADDLE_ENFORCE_EQ(grid_dims[0], x_dims[0], "Input(X) and Input(Grid) dims[0] should be equal."); "Input(Grid) of GridSampleOp should be 4-D Tensor.");
PADDLE_ENFORCE_EQ(grid_dims[1], x_dims[2], "Input(X) dims[2] and Input(Grid) dims[1] should be equal."); PADDLE_ENFORCE(grid_dims[3] == 2, "Input(Grid) dims[3] should be 2.");
PADDLE_ENFORCE_EQ(grid_dims[2], x_dims[3], "Input(X) dims[3] and Input(Grid) dims[2] should be equal."); PADDLE_ENFORCE_EQ(grid_dims[0], x_dims[0],
"Input(X) and Input(Grid) dims[0] should be equal.");
ctx->SetOutputDim("Output", x_dims); PADDLE_ENFORCE_EQ(
ctx->ShareLoD("X", "Output"); grid_dims[1], x_dims[2],
} "Input(X) dims[2] and Input(Grid) dims[1] should be equal.");
PADDLE_ENFORCE_EQ(
protected: grid_dims[2], x_dims[3],
framework::OpKernelType GetExpectedKernelType( "Input(X) dims[3] and Input(Grid) dims[2] should be equal.");
const framework::ExecutionContext& ctx) const override {
framework::LibraryType library_{framework::LibraryType::kPlain}; ctx->SetOutputDim("Output", x_dims);
ctx->ShareLoD("X", "Output");
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
framework::LibraryType library_{framework::LibraryType::kPlain};
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
if (platform::CanCUDNNBeUsed(ctx)) { if (platform::CanCUDNNBeUsed(ctx)) {
library_ = framework::LibraryType::kCUDNN; library_ = framework::LibraryType::kCUDNN;
}
#endif
return framework::OpKernelType(
framework::ToDataType(ctx.Input<Tensor>("X")->type()),
ctx.GetPlace(), framework::DataLayout::kAnyLayout, library_);
} }
#endif
return framework::OpKernelType(
framework::ToDataType(ctx.Input<Tensor>("X")->type()), ctx.GetPlace(),
framework::DataLayout::kAnyLayout, library_);
}
}; };
class GridSampleOpMaker : public framework::OpProtoAndCheckerMaker { class GridSampleOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
void Make() override { void Make() override {
AddInput( AddInput("X",
"X", "(Tensor) The input data of GridSampleOp, "
"(Tensor) The input data of GridSampleOp, " "This is a 4-D tensor with shape of [N, C, H, W]");
"This is a 4-D tensor with shape of [N, C, H, W]"); AddInput(
AddInput( "Grid",
"Grid", "(Tensor) The input grid of GridSampleOp generated by AffineGridOp, "
"(Tensor) The input grid of GridSampleOp generated by AffineGridOp, " "This is a 4-D tensor with shape of [N, H, W, 2] is the concatenation "
"This is a 4-D tensor with shape of [N, H, W, 2] is the concatenation " "of x and y coordinates with shape [N, H, W] in last dimention");
"of x and y coordinates with shape [N, H, W] in last dimention"); AddOutput("Output", "(Tensor) Output tensor with shape [N, C, H, W]");
AddOutput( AddAttr<bool>(
"Output", "use_cudnn",
"(Tensor) Output tensor with shape [N, C, H, W]"); "(bool, default true) Only used in cudnn kernel, need install cudnn")
AddAttr<bool>( .SetDefault(true);
"use_cudnn",
"(bool, default true) Only used in cudnn kernel, need install cudnn") AddComment(R"DOC(
.SetDefault(true); This operation samples input X by using bilinear interpolation based on
flow field grid, which is usually gennerated by affine_grid. The grid of
AddComment(R"DOC( shape [N, H, W, 2] is the concatenation of (grid_x, grid_y) coordinates
It sample input X by grid gennerate by AffineGridOp. The grid of shape with shape [N, H, W] each, where grid_x is indexing the 4th dimension
[N, H, W, 2] is the concatenation of (x, y) coordinates with shape (in width dimension) of input data x and grid_y is indexng the 3rd
[N, H, W] each, with x indexing the 4th-D(W) of input feature map and y to dimention (in height dimension), finally results is the bilinear
indexng the 3rd-D(H), finally results is the bilinear interpolation value interpolation value of 4 nearest corner points.
of 4 nearest corner points.
Step 1: Step 1:
Get (x, y) grid coordinates and scale to [0, H-1/W-1]. Get (x, y) grid coordinates and scale to [0, H-1/W-1].
...@@ -127,11 +133,11 @@ class GridSampleOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -127,11 +133,11 @@ class GridSampleOpMaker : public framework::OpProtoAndCheckerMaker {
output = wn * d_e * d_s + en * d_w * d_s output = wn * d_e * d_s + en * d_w * d_s
+ ws * d_e * d_n + es * d_w * d_n + ws * d_e * d_n + es * d_w * d_n
)DOC"); )DOC");
} }
}; };
class GridSampleOpGrad : public framework::OperatorWithKernel { class GridSampleOpGrad : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
auto input_dims = ctx->GetInputDim("X"); auto input_dims = ctx->GetInputDim("X");
...@@ -144,43 +150,43 @@ class GridSampleOpGrad : public framework::OperatorWithKernel { ...@@ -144,43 +150,43 @@ class GridSampleOpGrad : public framework::OperatorWithKernel {
} }
} }
protected: protected:
framework::OpKernelType GetExpectedKernelType( framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override { const framework::ExecutionContext& ctx) const override {
framework::LibraryType library_{framework::LibraryType::kPlain}; framework::LibraryType library_{framework::LibraryType::kPlain};
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
if (platform::CanCUDNNBeUsed(ctx)) { if (platform::CanCUDNNBeUsed(ctx)) {
library_ = framework::LibraryType::kCUDNN; library_ = framework::LibraryType::kCUDNN;
}
#endif
return framework::OpKernelType(
framework::ToDataType(ctx.Input<Tensor>("X")->type()),
ctx.GetPlace(), framework::DataLayout::kAnyLayout, library_);
} }
#endif
return framework::OpKernelType(
framework::ToDataType(ctx.Input<Tensor>("X")->type()), ctx.GetPlace(),
framework::DataLayout::kAnyLayout, library_);
}
}; };
class GridSampleGradMaker : public framework::SingleGradOpDescMaker { class GridSampleGradMaker : public framework::SingleGradOpDescMaker {
public: public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected: protected:
std::unique_ptr<framework::OpDesc> Apply() const override { std::unique_ptr<framework::OpDesc> Apply() const override {
auto* op = new framework::OpDesc(); auto* op = new framework::OpDesc();
op->SetType("grid_sampler_grad"); op->SetType("grid_sampler_grad");
op->SetInput("X", Input("X")); op->SetInput("X", Input("X"));
op->SetInput("Grid", Input("Grid")); op->SetInput("Grid", Input("Grid"));
op->SetInput(framework::GradVarName("Output"), OutputGrad("Output")); op->SetInput(framework::GradVarName("Output"), OutputGrad("Output"));
op->SetAttrMap(Attrs()); op->SetAttrMap(Attrs());
op->SetOutput(framework::GradVarName("X"), InputGrad("X")); op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
op->SetOutput(framework::GradVarName("Grid"), InputGrad("Grid")); op->SetOutput(framework::GradVarName("Grid"), InputGrad("Grid"));
return std::unique_ptr<framework::OpDesc>(op); return std::unique_ptr<framework::OpDesc>(op);
} }
}; };
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OPERATOR(grid_sampler, ops::GridSampleOp, ops::GridSampleOpMaker, REGISTER_OPERATOR(grid_sampler, ops::GridSampleOp, ops::GridSampleOpMaker,
......
...@@ -19,19 +19,17 @@ limitations under the License. */ ...@@ -19,19 +19,17 @@ limitations under the License. */
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/hostdevice.h" #include "paddle/fluid/platform/hostdevice.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = framework::Tensor; using Tensor = framework::Tensor;
template <typename T, size_t D, int MajorType = Eigen::RowMajor, template <typename T, size_t D, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex> typename IndexType = Eigen::DenseIndex>
using EigenTensor = framework::EigenTensor<T, D, MajorType, IndexType>; using EigenTensor = framework::EigenTensor<T, D, MajorType, IndexType>;
using Array3 = Eigen::DSizes<int64_t, 3>; using Array3 = Eigen::DSizes<int64_t, 3>;
using Array4 = Eigen::DSizes<int64_t, 4>; using Array4 = Eigen::DSizes<int64_t, 4>;
template <typename T> template <typename T>
static inline bool isInBound(T x, T y, T x_max, T y_max) { static inline bool isInBound(T x, T y, T x_max, T y_max) {
if (x < 0 || x > x_max || y < 0 || y > y_max) { if (x < 0 || x > x_max || y < 0 || y > y_max) {
...@@ -40,16 +38,17 @@ static inline bool isInBound(T x, T y, T x_max, T y_max) { ...@@ -40,16 +38,17 @@ static inline bool isInBound(T x, T y, T x_max, T y_max) {
return true; return true;
} }
template <typename DeviceContext, typename T> template <typename T>
static void CalcGridLocations(const DeviceContext& ctx, const Tensor& grid, static void CalcGridLocations(const platform::CPUDeviceContext& ctx,
Tensor* x_w, Tensor* x_e, Tensor* y_n, Tensor* y_s, const Tensor& grid, Tensor* x_w, Tensor* x_e,
Tensor* d_w, Tensor* d_e, Tensor* d_n, Tensor* d_s) { Tensor* y_n, Tensor* y_s, Tensor* d_w,
Tensor* d_e, Tensor* d_n, Tensor* d_s) {
auto& place = *ctx.eigen_device(); auto& place = *ctx.eigen_device();
const int n = grid.dims()[0]; const int n = grid.dims()[0];
const int h = grid.dims()[1]; const int h = grid.dims()[1];
const int w = grid.dims()[2]; const int w = grid.dims()[2];
const T x_max = static_cast<T> (w - 1); const T x_max = static_cast<T>(w - 1);
const T y_max = static_cast<T> (h - 1); const T y_max = static_cast<T>(h - 1);
// split grid with shape (n, h, w, 2) into (x, y) by the 3rd Dim // split grid with shape (n, h, w, 2) into (x, y) by the 3rd Dim
Tensor grid_x, grid_y; Tensor grid_x, grid_y;
...@@ -102,7 +101,7 @@ static void CalcGridLocations(const DeviceContext& ctx, const Tensor& grid, ...@@ -102,7 +101,7 @@ static void CalcGridLocations(const DeviceContext& ctx, const Tensor& grid,
template <typename T> template <typename T>
static void GetGridPointValue(const Tensor& input, Tensor* output, static void GetGridPointValue(const Tensor& input, Tensor* output,
const Tensor& x, const Tensor& y) { const Tensor& x, const Tensor& y) {
const int n = input.dims()[0]; const int n = input.dims()[0];
const int c = input.dims()[1]; const int c = input.dims()[1];
const int h = input.dims()[2]; const int h = input.dims()[2];
...@@ -117,7 +116,9 @@ static void GetGridPointValue(const Tensor& input, Tensor* output, ...@@ -117,7 +116,9 @@ static void GetGridPointValue(const Tensor& input, Tensor* output,
for (int l = 0; l < w; l++) { for (int l = 0; l < w; l++) {
if (isInBound(x_t(i, k, l), y_t(i, k, l), (T)(w - 1), (T)(h - 1))) { if (isInBound(x_t(i, k, l), y_t(i, k, l), (T)(w - 1), (T)(h - 1))) {
for (int j = 0; j < c; j++) { for (int j = 0; j < c; j++) {
output_t(i, j, k, l) = input_t(i, j, (int)round(y_t(i, k, l)), (int)round(x_t(i, k, l))); output_t(i, j, k, l) =
input_t(i, j, static_cast<int>(round(y_t(i, k, l))),
static_cast<int>(round(x_t(i, k, l))));
} }
} }
} }
...@@ -126,9 +127,10 @@ static void GetGridPointValue(const Tensor& input, Tensor* output, ...@@ -126,9 +127,10 @@ static void GetGridPointValue(const Tensor& input, Tensor* output,
} }
template <typename T> template <typename T>
static void GatherOutputGradToInputGrad(const Tensor& output_grad, Tensor* input_grad, static void GatherOutputGradToInputGrad(const Tensor& output_grad,
const Tensor& x, const Tensor& y, Tensor* input_grad, const Tensor& x,
const Tensor& d1, const Tensor& d2) { const Tensor& y, const Tensor& d1,
const Tensor& d2) {
const int n = output_grad.dims()[0]; const int n = output_grad.dims()[0];
const int c = output_grad.dims()[1]; const int c = output_grad.dims()[1];
const int h = output_grad.dims()[2]; const int h = output_grad.dims()[2];
...@@ -143,10 +145,11 @@ static void GatherOutputGradToInputGrad(const Tensor& output_grad, Tensor* input ...@@ -143,10 +145,11 @@ static void GatherOutputGradToInputGrad(const Tensor& output_grad, Tensor* input
for (int i = 0; i < n; i++) { for (int i = 0; i < n; i++) {
for (int k = 0; k < h; k++) { for (int k = 0; k < h; k++) {
for (int l = 0; l < w; l++) { for (int l = 0; l < w; l++) {
if(isInBound(x_t(i, k, l), y_t(i, k, l), (T)(w - 1), (T)(h - 1))) { if (isInBound(x_t(i, k, l), y_t(i, k, l), (T)(w - 1), (T)(h - 1))) {
for (int j = 0; j < c; j++) { for (int j = 0; j < c; j++) {
input_grad_t(i, j, (int) y_t(i, k, l), (int) x_t(i, k, l)) += input_grad_t(i, j, static_cast<int>(round(y_t(i, k, l))),
output_grad_t(i, j, k ,l) * d1_t(i, k, l) * d2_t(i, k, l); static_cast<int>(round(x_t(i, k, l)))) +=
output_grad_t(i, j, k, l) * d1_t(i, k, l) * d2_t(i, k, l);
} }
} }
} }
...@@ -154,162 +157,166 @@ static void GatherOutputGradToInputGrad(const Tensor& output_grad, Tensor* input ...@@ -154,162 +157,166 @@ static void GatherOutputGradToInputGrad(const Tensor& output_grad, Tensor* input
} }
} }
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class GridSampleOpKernel : public framework::OpKernel<T> { class GridSampleOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
auto& place = *ctx.template device_context<DeviceContext>().eigen_device(); auto& place = *ctx.template device_context<DeviceContext>().eigen_device();
auto* input = ctx.Input<Tensor>("X"); auto* input = ctx.Input<Tensor>("X");
auto* grid = ctx.Input<Tensor>("Grid"); auto* grid = ctx.Input<Tensor>("Grid");
const int n = input->dims()[0]; const int n = input->dims()[0];
const int c = input->dims()[1]; const int c = input->dims()[1];
const int h = input->dims()[2]; const int h = input->dims()[2];
const int w = input->dims()[3]; const int w = input->dims()[3];
// calc locations and distances of 4 corner points // calc locations and distances of 4 corner points
Tensor x_w, x_e, y_n, y_s; Tensor x_w, x_e, y_n, y_s;
Tensor d_w, d_e, d_n, d_s; Tensor d_w, d_e, d_n, d_s;
CalcGridLocations<DeviceContext, T>(ctx.template device_context<DeviceContext>(), CalcGridLocations<T>(
*grid, ctx.template device_context<platform::CPUDeviceContext>(), *grid, &x_w,
&x_w, &x_e, &y_n, &y_s, &x_e, &y_n, &y_s, &d_w, &d_e, &d_n, &d_s);
&d_w, &d_e, &d_n, &d_s);
auto* output = ctx.Output<Tensor>("Output");
auto* output = ctx.Output<Tensor>("Output"); output->mutable_data<T>({n, c, h, w}, ctx.GetPlace());
output->mutable_data<T>({n, c, h, w}, ctx.GetPlace()); math::SetConstant<DeviceContext, T>()(
math::SetConstant<DeviceContext, T>()( ctx.template device_context<DeviceContext>(), output,
ctx.template device_context<DeviceContext>(), output, static_cast<T>(0));
static_cast<T>(0));
// calc 4 corner points value
// calc 4 corner points value Tensor v_wn, v_en, v_ws, v_es;
Tensor v_wn, v_en, v_ws, v_es; v_wn.mutable_data<T>({n, c, h, w}, ctx.GetPlace());
v_wn.mutable_data<T>({n, c, h, w}, ctx.GetPlace()); v_en.mutable_data<T>({n, c, h, w}, ctx.GetPlace());
v_en.mutable_data<T>({n, c, h, w}, ctx.GetPlace()); v_ws.mutable_data<T>({n, c, h, w}, ctx.GetPlace());
v_ws.mutable_data<T>({n, c, h, w}, ctx.GetPlace()); v_es.mutable_data<T>({n, c, h, w}, ctx.GetPlace());
v_es.mutable_data<T>({n, c, h, w}, ctx.GetPlace()); GetGridPointValue<T>(*input, &v_wn, x_w, y_n);
GetGridPointValue<T>(*input, &v_wn, x_w, y_n); GetGridPointValue<T>(*input, &v_en, x_e, y_n);
GetGridPointValue<T>(*input, &v_en, x_e, y_n); GetGridPointValue<T>(*input, &v_ws, x_w, y_s);
GetGridPointValue<T>(*input, &v_ws, x_w, y_s); GetGridPointValue<T>(*input, &v_es, x_e, y_s);
GetGridPointValue<T>(*input, &v_es, x_e, y_s);
auto d_w_t = EigenTensor<T, 3>::From(d_w);
auto d_w_t = EigenTensor<T, 3>::From(d_w); auto d_e_t = EigenTensor<T, 3>::From(d_e);
auto d_e_t = EigenTensor<T, 3>::From(d_e); auto d_n_t = EigenTensor<T, 3>::From(d_n);
auto d_n_t = EigenTensor<T, 3>::From(d_n); auto d_s_t = EigenTensor<T, 3>::From(d_s);
auto d_s_t = EigenTensor<T, 3>::From(d_s); auto d_w_scaled_t =
auto d_w_scaled_t = d_w_t.reshape(Array4(n, 1, h, w)).broadcast(Array4(1, c, 1, 1)); d_w_t.reshape(Array4(n, 1, h, w)).broadcast(Array4(1, c, 1, 1));
auto d_e_scaled_t = d_e_t.reshape(Array4(n, 1, h, w)).broadcast(Array4(1, c, 1, 1)); auto d_e_scaled_t =
auto d_n_scaled_t = d_n_t.reshape(Array4(n, 1, h, w)).broadcast(Array4(1, c, 1, 1)); d_e_t.reshape(Array4(n, 1, h, w)).broadcast(Array4(1, c, 1, 1));
auto d_s_scaled_t = d_s_t.reshape(Array4(n, 1, h, w)).broadcast(Array4(1, c, 1, 1)); auto d_n_scaled_t =
auto v_wn_t = EigenTensor<T, 4>::From(v_wn); d_n_t.reshape(Array4(n, 1, h, w)).broadcast(Array4(1, c, 1, 1));
auto v_en_t = EigenTensor<T, 4>::From(v_en); auto d_s_scaled_t =
auto v_ws_t = EigenTensor<T, 4>::From(v_ws); d_s_t.reshape(Array4(n, 1, h, w)).broadcast(Array4(1, c, 1, 1));
auto v_es_t = EigenTensor<T, 4>::From(v_es); auto v_wn_t = EigenTensor<T, 4>::From(v_wn);
auto output_t = EigenTensor<T, 4>::From(*output); auto v_en_t = EigenTensor<T, 4>::From(v_en);
//bilinear interpolaetion by 4 corner points auto v_ws_t = EigenTensor<T, 4>::From(v_ws);
output_t.device(place) = v_wn_t * d_e_scaled_t * d_s_scaled_t auto v_es_t = EigenTensor<T, 4>::From(v_es);
+ v_en_t * d_w_scaled_t * d_s_scaled_t auto output_t = EigenTensor<T, 4>::From(*output);
+ v_ws_t * d_e_scaled_t * d_n_scaled_t // bilinear interpolaetion by 4 corner points
+ v_es_t * d_w_scaled_t * d_n_scaled_t; output_t.device(place) = v_wn_t * d_e_scaled_t * d_s_scaled_t +
} v_en_t * d_w_scaled_t * d_s_scaled_t +
v_ws_t * d_e_scaled_t * d_n_scaled_t +
v_es_t * d_w_scaled_t * d_n_scaled_t;
}
}; };
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class GridSampleGradOpKernel : public framework::OpKernel<T> { class GridSampleGradOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
auto* input = ctx.Input<Tensor>("X"); auto* input = ctx.Input<Tensor>("X");
auto* grid = ctx.Input<Tensor>("Grid"); auto* grid = ctx.Input<Tensor>("Grid");
auto* output_grad = ctx.Input<Tensor>(framework::GradVarName("Output")); auto* output_grad = ctx.Input<Tensor>(framework::GradVarName("Output"));
const int n = input->dims()[0]; const int n = input->dims()[0];
const int c = input->dims()[1]; const int c = input->dims()[1];
const int h = input->dims()[2]; const int h = input->dims()[2];
const int w = input->dims()[3]; const int w = input->dims()[3];
auto* input_grad = ctx.Output<Tensor>(framework::GradVarName("X")); auto* input_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
input_grad->mutable_data<T>({n, c, h, w}, ctx.GetPlace()); input_grad->mutable_data<T>({n, c, h, w}, ctx.GetPlace());
math::SetConstant<DeviceContext, T>()( math::SetConstant<DeviceContext, T>()(
ctx.template device_context<DeviceContext>(), input_grad, ctx.template device_context<DeviceContext>(), input_grad,
static_cast<T>(0)); static_cast<T>(0));
auto* grid_grad = ctx.Output<Tensor>(framework::GradVarName("Grid")); auto* grid_grad = ctx.Output<Tensor>(framework::GradVarName("Grid"));
grid_grad->mutable_data<T>({n, h, w, 2}, ctx.GetPlace()); grid_grad->mutable_data<T>({n, h, w, 2}, ctx.GetPlace());
math::SetConstant<DeviceContext, T>()( math::SetConstant<DeviceContext, T>()(
ctx.template device_context<DeviceContext>(), grid_grad, ctx.template device_context<DeviceContext>(), grid_grad,
static_cast<T>(0)); static_cast<T>(0));
Tensor x_w, x_e, y_n, y_s; Tensor x_w, x_e, y_n, y_s;
Tensor d_w, d_e, d_n, d_s; Tensor d_w, d_e, d_n, d_s;
CalcGridLocations<DeviceContext, T>(ctx.template device_context<DeviceContext>(), CalcGridLocations<T>(
*grid, ctx.template device_context<platform::CPUDeviceContext>(), *grid, &x_w,
&x_w, &x_e, &y_n, &y_s, &x_e, &y_n, &y_s, &d_w, &d_e, &d_n, &d_s);
&d_w, &d_e, &d_n, &d_s);
// gather output grad value to input grad by corner point coords and weight
// gather output grad value to input grad by corner point coords and weight GatherOutputGradToInputGrad<T>(*output_grad, input_grad, x_w, y_n, d_e,
GatherOutputGradToInputGrad<T>(*output_grad, input_grad, x_w, y_n, d_e, d_s); d_s);
GatherOutputGradToInputGrad<T>(*output_grad, input_grad, x_w, y_s, d_e, d_n); GatherOutputGradToInputGrad<T>(*output_grad, input_grad, x_w, y_s, d_e,
GatherOutputGradToInputGrad<T>(*output_grad, input_grad, x_e, y_n, d_w, d_s); d_n);
GatherOutputGradToInputGrad<T>(*output_grad, input_grad, x_e, y_s, d_w, d_n); GatherOutputGradToInputGrad<T>(*output_grad, input_grad, x_e, y_n, d_w,
d_s);
// calc 4 corner points value GatherOutputGradToInputGrad<T>(*output_grad, input_grad, x_e, y_s, d_w,
Tensor v_wn, v_en, v_ws, v_es; d_n);
v_wn.mutable_data<T>({n, c, h, w}, ctx.GetPlace());
v_en.mutable_data<T>({n, c, h, w}, ctx.GetPlace()); // calc 4 corner points value
v_ws.mutable_data<T>({n, c, h, w}, ctx.GetPlace()); Tensor v_wn, v_en, v_ws, v_es;
v_es.mutable_data<T>({n, c, h, w}, ctx.GetPlace()); v_wn.mutable_data<T>({n, c, h, w}, ctx.GetPlace());
GetGridPointValue<T>(*input, &v_wn, x_w, y_n); v_en.mutable_data<T>({n, c, h, w}, ctx.GetPlace());
GetGridPointValue<T>(*input, &v_en, x_e, y_n); v_ws.mutable_data<T>({n, c, h, w}, ctx.GetPlace());
GetGridPointValue<T>(*input, &v_ws, x_w, y_s); v_es.mutable_data<T>({n, c, h, w}, ctx.GetPlace());
GetGridPointValue<T>(*input, &v_es, x_e, y_s); GetGridPointValue<T>(*input, &v_wn, x_w, y_n);
auto v_wn_t = EigenTensor<T, 4>::From(v_wn); GetGridPointValue<T>(*input, &v_en, x_e, y_n);
auto v_en_t = EigenTensor<T, 4>::From(v_en); GetGridPointValue<T>(*input, &v_ws, x_w, y_s);
auto v_ws_t = EigenTensor<T, 4>::From(v_ws); GetGridPointValue<T>(*input, &v_es, x_e, y_s);
auto v_es_t = EigenTensor<T, 4>::From(v_es); auto v_wn_t = EigenTensor<T, 4>::From(v_wn);
auto v_en_t = EigenTensor<T, 4>::From(v_en);
auto d_w_t = EigenTensor<T, 3>::From(d_w); auto v_ws_t = EigenTensor<T, 4>::From(v_ws);
auto d_e_t = EigenTensor<T, 3>::From(d_e); auto v_es_t = EigenTensor<T, 4>::From(v_es);
auto d_n_t = EigenTensor<T, 3>::From(d_n);
auto d_s_t = EigenTensor<T, 3>::From(d_s); auto d_w_t = EigenTensor<T, 3>::From(d_w);
auto d_e_t = EigenTensor<T, 3>::From(d_e);
auto output_grad_t = EigenTensor<T, 4>::From(*output_grad); auto d_n_t = EigenTensor<T, 3>::From(d_n);
auto d_s_t = EigenTensor<T, 3>::From(d_s);
Tensor grid_grad_x, grid_grad_y;
grid_grad_x.mutable_data<T>({n, h, w}, ctx.GetPlace()); auto output_grad_t = EigenTensor<T, 4>::From(*output_grad);
grid_grad_y.mutable_data<T>({n, h, w}, ctx.GetPlace());
auto grid_grad_x_t = EigenTensor<T, 3>::From(grid_grad_x).setConstant(0.0); Tensor grid_grad_x, grid_grad_y;
auto grid_grad_y_t = EigenTensor<T, 3>::From(grid_grad_y).setConstant(0.0); grid_grad_x.mutable_data<T>({n, h, w}, ctx.GetPlace());
for (int i = 0; i < n; i++) { grid_grad_y.mutable_data<T>({n, h, w}, ctx.GetPlace());
for(int j = 0; j < c; j++) { auto grid_grad_x_t = EigenTensor<T, 3>::From(grid_grad_x).setConstant(0.0);
for(int k = 0; k < h; k++) { auto grid_grad_y_t = EigenTensor<T, 3>::From(grid_grad_y).setConstant(0.0);
for(int l = 0; l < w; l++) { for (int i = 0; i < n; i++) {
grid_grad_x_t(i, k, l) += ((v_en_t(i, j, k, l) - v_wn_t(i, j, k, l)) * d_s_t(i, k, l) for (int j = 0; j < c; j++) {
+ (v_es_t(i, j, k, l) - v_ws_t(i, j, k, l)) * d_n_t(i, k, l)) for (int k = 0; k < h; k++) {
* output_grad_t(i, j, k, l); for (int l = 0; l < w; l++) {
grid_grad_y_t(i, k, l) += ((v_ws_t(i, j, k, l) - v_wn_t(i, j, k, l)) * d_e_t(i, k, l) grid_grad_x_t(i, k, l) +=
+ (v_es_t(i, j, k, l) - v_en_t(i, j, k, l)) * d_w_t(i, k, l)) ((v_en_t(i, j, k, l) - v_wn_t(i, j, k, l)) * d_s_t(i, k, l) +
* output_grad_t(i, j, k, l); (v_es_t(i, j, k, l) - v_ws_t(i, j, k, l)) * d_n_t(i, k, l)) *
} output_grad_t(i, j, k, l);
grid_grad_y_t(i, k, l) +=
((v_ws_t(i, j, k, l) - v_wn_t(i, j, k, l)) * d_e_t(i, k, l) +
(v_es_t(i, j, k, l) - v_en_t(i, j, k, l)) * d_w_t(i, k, l)) *
output_grad_t(i, j, k, l);
} }
} }
} }
const T x_max = static_cast<T>(w - 1);
const T y_max = static_cast<T>(h - 1);
grid_grad_x_t = grid_grad_x_t * (x_max / (T)2);
grid_grad_y_t = grid_grad_y_t * (y_max / (T)2);
// gather grid_grad [x, y] in 3rd Dim
T* grid_grad_data = grid_grad->data<T>();
T* grid_grad_x_data = grid_grad_x.data<T>();
T* grid_grad_y_data = grid_grad_y.data<T>();
for (int i = 0; i < n * h * w; i++) {
grid_grad_data[2 * i] = grid_grad_x_data[i];
grid_grad_data[2 * i + 1] = grid_grad_y_data[i];
}
} }
const T x_max = static_cast<T>(w - 1);
const T y_max = static_cast<T>(h - 1);
grid_grad_x_t = grid_grad_x_t * (x_max / (T)2);
grid_grad_y_t = grid_grad_y_t * (y_max / (T)2);
// gather grid_grad [x, y] in 3rd Dim
T* grid_grad_data = grid_grad->data<T>();
T* grid_grad_x_data = grid_grad_x.data<T>();
T* grid_grad_y_data = grid_grad_y.data<T>();
for (int i = 0; i < n * h * w; i++) {
grid_grad_data[2 * i] = grid_grad_x_data[i];
grid_grad_data[2 * i + 1] = grid_grad_y_data[i];
}
}
}; };
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -342,7 +342,7 @@ class ScopedPoolingDescriptor { ...@@ -342,7 +342,7 @@ class ScopedPoolingDescriptor {
}; };
class ScopedSpatialTransformerDescriptor { class ScopedSpatialTransformerDescriptor {
public: public:
ScopedSpatialTransformerDescriptor() { ScopedSpatialTransformerDescriptor() {
PADDLE_ENFORCE(dynload::cudnnCreateSpatialTransformerDescriptor(&desc_)); PADDLE_ENFORCE(dynload::cudnnCreateSpatialTransformerDescriptor(&desc_));
} }
...@@ -354,13 +354,13 @@ class ScopedSpatialTransformerDescriptor { ...@@ -354,13 +354,13 @@ class ScopedSpatialTransformerDescriptor {
inline cudnnSpatialTransformerDescriptor_t descriptor(const int nbDims, inline cudnnSpatialTransformerDescriptor_t descriptor(const int nbDims,
const int dimA[]) { const int dimA[]) {
PADDLE_ENFORCE(dynload::cudnnSetSpatialTransformerNdDescriptor( PADDLE_ENFORCE(dynload::cudnnSetSpatialTransformerNdDescriptor(
desc_, CUDNN_SAMPLER_BILINEAR, CudnnDataType<T>::type, nbDims, dimA)); desc_, CUDNN_SAMPLER_BILINEAR, CudnnDataType<T>::type, nbDims, dimA));
return desc_; return desc_;
} }
private: private:
cudnnSpatialTransformerDescriptor_t desc_; cudnnSpatialTransformerDescriptor_t desc_;
DISABLE_COPY_AND_ASSIGN(ScopedSpatialTransformerDescriptor); DISABLE_COPY_AND_ASSIGN(ScopedSpatialTransformerDescriptor);
}; };
inline bool CanCUDNNBeUsed(const framework::ExecutionContext& ctx) { inline bool CanCUDNNBeUsed(const framework::ExecutionContext& ctx) {
......
...@@ -65,51 +65,51 @@ extern void EnforceCUDNNLoaded(const char* fn_name); ...@@ -65,51 +65,51 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
* include all needed cudnn functions in HPPL * include all needed cudnn functions in HPPL
* different cudnn version has different interfaces * different cudnn version has different interfaces
**/ **/
#define CUDNN_DNN_ROUTINE_EACH(__macro) \ #define CUDNN_DNN_ROUTINE_EACH(__macro) \
__macro(cudnnSetTensor4dDescriptor); \ __macro(cudnnSetTensor4dDescriptor); \
__macro(cudnnSetTensor4dDescriptorEx); \ __macro(cudnnSetTensor4dDescriptorEx); \
__macro(cudnnSetTensorNdDescriptor); \ __macro(cudnnSetTensorNdDescriptor); \
__macro(cudnnGetTensorNdDescriptor); \ __macro(cudnnGetTensorNdDescriptor); \
__macro(cudnnGetConvolutionNdForwardOutputDim); \ __macro(cudnnGetConvolutionNdForwardOutputDim); \
__macro(cudnnGetConvolutionForwardAlgorithm); \ __macro(cudnnGetConvolutionForwardAlgorithm); \
__macro(cudnnCreateTensorDescriptor); \ __macro(cudnnCreateTensorDescriptor); \
__macro(cudnnDestroyTensorDescriptor); \ __macro(cudnnDestroyTensorDescriptor); \
__macro(cudnnCreateFilterDescriptor); \ __macro(cudnnCreateFilterDescriptor); \
__macro(cudnnSetFilter4dDescriptor); \ __macro(cudnnSetFilter4dDescriptor); \
__macro(cudnnSetFilterNdDescriptor); \ __macro(cudnnSetFilterNdDescriptor); \
__macro(cudnnGetFilterNdDescriptor); \ __macro(cudnnGetFilterNdDescriptor); \
__macro(cudnnSetPooling2dDescriptor); \ __macro(cudnnSetPooling2dDescriptor); \
__macro(cudnnSetPoolingNdDescriptor); \ __macro(cudnnSetPoolingNdDescriptor); \
__macro(cudnnGetPoolingNdDescriptor); \ __macro(cudnnGetPoolingNdDescriptor); \
__macro(cudnnDestroyFilterDescriptor); \ __macro(cudnnDestroyFilterDescriptor); \
__macro(cudnnCreateConvolutionDescriptor); \ __macro(cudnnCreateConvolutionDescriptor); \
__macro(cudnnCreatePoolingDescriptor); \ __macro(cudnnCreatePoolingDescriptor); \
__macro(cudnnDestroyPoolingDescriptor); \ __macro(cudnnDestroyPoolingDescriptor); \
__macro(cudnnSetConvolution2dDescriptor); \ __macro(cudnnSetConvolution2dDescriptor); \
__macro(cudnnDestroyConvolutionDescriptor); \ __macro(cudnnDestroyConvolutionDescriptor); \
__macro(cudnnSetConvolutionNdDescriptor); \ __macro(cudnnSetConvolutionNdDescriptor); \
__macro(cudnnGetConvolutionNdDescriptor); \ __macro(cudnnGetConvolutionNdDescriptor); \
__macro(cudnnDeriveBNTensorDescriptor); \ __macro(cudnnDeriveBNTensorDescriptor); \
__macro(cudnnCreateSpatialTransformerDescriptor); \ __macro(cudnnCreateSpatialTransformerDescriptor); \
__macro(cudnnSetSpatialTransformerNdDescriptor); \ __macro(cudnnSetSpatialTransformerNdDescriptor); \
__macro(cudnnDestroySpatialTransformerDescriptor);\ __macro(cudnnDestroySpatialTransformerDescriptor); \
__macro(cudnnSpatialTfGridGeneratorForward); \ __macro(cudnnSpatialTfGridGeneratorForward); \
__macro(cudnnSpatialTfGridGeneratorBackward); \ __macro(cudnnSpatialTfGridGeneratorBackward); \
__macro(cudnnSpatialTfSamplerForward); \ __macro(cudnnSpatialTfSamplerForward); \
__macro(cudnnSpatialTfSamplerBackward); \ __macro(cudnnSpatialTfSamplerBackward); \
__macro(cudnnCreate); \ __macro(cudnnCreate); \
__macro(cudnnDestroy); \ __macro(cudnnDestroy); \
__macro(cudnnSetStream); \ __macro(cudnnSetStream); \
__macro(cudnnActivationForward); \ __macro(cudnnActivationForward); \
__macro(cudnnConvolutionForward); \ __macro(cudnnConvolutionForward); \
__macro(cudnnConvolutionBackwardBias); \ __macro(cudnnConvolutionBackwardBias); \
__macro(cudnnGetConvolutionForwardWorkspaceSize); \ __macro(cudnnGetConvolutionForwardWorkspaceSize); \
__macro(cudnnTransformTensor); \ __macro(cudnnTransformTensor); \
__macro(cudnnPoolingForward); \ __macro(cudnnPoolingForward); \
__macro(cudnnPoolingBackward); \ __macro(cudnnPoolingBackward); \
__macro(cudnnSoftmaxBackward); \ __macro(cudnnSoftmaxBackward); \
__macro(cudnnSoftmaxForward); \ __macro(cudnnSoftmaxForward); \
__macro(cudnnGetVersion); \ __macro(cudnnGetVersion); \
__macro(cudnnGetErrorString); __macro(cudnnGetErrorString);
CUDNN_DNN_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP) CUDNN_DNN_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
......
...@@ -7586,11 +7586,13 @@ def hash(input, hash_size, num_hash=1, name=None): ...@@ -7586,11 +7586,13 @@ def hash(input, hash_size, num_hash=1, name=None):
@templatedoc() @templatedoc()
def grid_sampler(x, grid, name=None): def grid_sampler(x, grid, name=None):
""" """
It sample input X by grid gennerate by AffineGridOp. The grid of shape This operation samples input X by using bilinear interpolation based on
[N, H, W, 2] is the concatenation of (x, y) coordinates with shape flow field grid, which is usually gennerated by affine_grid. The grid of
[N, H, W] each, with x indexing the 4th-D(W) of input feature map and y to shape [N, H, W, 2] is the concatenation of (grid_x, grid_y) coordinates
indexng the 3rd-D(H), finally results is the bilinear interpolation value with shape [N, H, W] each, where grid_x is indexing the 4th dimension
of 4 nearest corner points. (in width dimension) of input data x and grid_y is indexng the 3rd
dimention (in height dimension), finally results is the bilinear
interpolation value of 4 nearest corner points.
Step 1: Step 1:
Get (x, y) grid coordinates and scale to [0, H-1/W-1]. Get (x, y) grid coordinates and scale to [0, H-1/W-1].
...@@ -7636,7 +7638,16 @@ def grid_sampler(x, grid, name=None): ...@@ -7636,7 +7638,16 @@ def grid_sampler(x, grid, name=None):
name (str, default None): The name of this layer. name (str, default None): The name of this layer.
Returns: Returns:
out(Variable): Output data indices by grid from x of shape [N, C, H, W]. out(Variable): Output of shape [N, C, H, W] data samples input X
using bilnear interpolation based on input grid.
Exmples:
.. code-block:: python
x = fluid.layers.data(name='x', shape=[3, 10, 32, 32], dtype='float32')
theta = fluid.layers.data(name='theta', shape=[3, 2, 3], dtype='float32')
grid = fluid.layers.affine_grid(input=theta, size=[3, 10, 32, 32]})
out = fluid.layers.grid_sampler(x=x, grid=grid)
""" """
helper = LayerHelper("grid_sampler", **locals()) helper = LayerHelper("grid_sampler", **locals())
...@@ -7649,10 +7660,6 @@ def grid_sampler(x, grid, name=None): ...@@ -7649,10 +7660,6 @@ def grid_sampler(x, grid, name=None):
out = helper.create_tmp_variable(x.dtype) out = helper.create_tmp_variable(x.dtype)
ipts = {'X': x, 'Grid': grid} ipts = {'X': x, 'Grid': grid}
helper.apppend_op( helper.apppend_op(type='grid_sampler', inputs=ipts, outputs={'Output', out})
type='grid_sampler',
inputs=ipts,
outputs={'Output', out})
return out return out
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import unittest import unittest
import numpy as np import numpy as np
from op_test import OpTest from op_test import OpTest
...@@ -23,11 +22,11 @@ def AffineGrid(theta, size): ...@@ -23,11 +22,11 @@ def AffineGrid(theta, size):
h = size[2] h = size[2]
w = size[3] w = size[3]
h_idx = np.repeat( h_idx = np.repeat(
np.linspace(-1, 1, h)[np.newaxis, :], w, axis=0).T[:, :, np.newaxis] np.linspace(-1, 1, h)[np.newaxis, :], w, axis=0).T[:, :, np.newaxis]
w_idx = np.repeat( w_idx = np.repeat(
np.linspace(-1, 1, w)[np.newaxis, :], h, axis=0)[:, :, np.newaxis] np.linspace(-1, 1, w)[np.newaxis, :], h, axis=0)[:, :, np.newaxis]
grid = np.concatenate( grid = np.concatenate(
[w_idx, h_idx, np.ones([h, w, 1])], axis=2) # h * w * 3 [w_idx, h_idx, np.ones([h, w, 1])], axis=2) # h * w * 3
grid = np.repeat(grid[np.newaxis, :], size[0], axis=0) # n * h * w *3 grid = np.repeat(grid[np.newaxis, :], size[0], axis=0) # n * h * w *3
ret = np.zeros([n, h * w, 2]) ret = np.zeros([n, h * w, 2])
...@@ -37,6 +36,7 @@ def AffineGrid(theta, size): ...@@ -37,6 +36,7 @@ def AffineGrid(theta, size):
return ret.reshape([n, h, w, 2]).astype("float32") return ret.reshape([n, h, w, 2]).astype("float32")
def getGridPointValue(data, x, y): def getGridPointValue(data, x, y):
data_shape = data.shape data_shape = data.shape
N = data_shape[0] N = data_shape[0]
...@@ -47,13 +47,15 @@ def getGridPointValue(data, x, y): ...@@ -47,13 +47,15 @@ def getGridPointValue(data, x, y):
for i in range(N): for i in range(N):
for j in range(H): for j in range(H):
for k in range(W): for k in range(W):
if y[i, j, k] < 0 or y[i, j, k] > H - 1 or x[i, j, k] < 0 or x[i, j, k] > W - 1: if y[i, j, k] < 0 or y[i, j, k] > H - 1 or x[i, j, k] < 0 or x[
i, j, k] > W - 1:
out[i, :, j, k] = 0 out[i, :, j, k] = 0
else: else:
out[i, :, j, k] = data[i, :, y[i, j, k], x[i, j, k]] out[i, :, j, k] = data[i, :, y[i, j, k], x[i, j, k]]
return out return out
def GridSampler(data, grid): def GridSampler(data, grid):
dims = data.shape dims = data.shape
N = dims[0] N = dims[0]
...@@ -71,7 +73,7 @@ def GridSampler(data, grid): ...@@ -71,7 +73,7 @@ def GridSampler(data, grid):
x0 = np.floor(x).astype('int32') x0 = np.floor(x).astype('int32')
x1 = x0 + 1 x1 = x0 + 1
y0 = np.floor(y).astype('int32') y0 = np.floor(y).astype('int32')
y1 = y0 + 1 y1 = y0 + 1
wa = np.tile(((x1 - x) * (y1 - y)).reshape((N, 1, H, W)), (1, C, 1, 1)) wa = np.tile(((x1 - x) * (y1 - y)).reshape((N, 1, H, W)), (1, C, 1, 1))
...@@ -87,6 +89,7 @@ def GridSampler(data, grid): ...@@ -87,6 +89,7 @@ def GridSampler(data, grid):
out = (wa * va + wb * vb + wc * vc + wd * vd).astype('float32') out = (wa * va + wb * vb + wc * vc + wd * vd).astype('float32')
return out return out
class TestGridSamplerOp(OpTest): class TestGridSamplerOp(OpTest):
def setUp(self): def setUp(self):
self.initTestCase() self.initTestCase()
...@@ -115,5 +118,6 @@ class TestGridSamplerOp(OpTest): ...@@ -115,5 +118,6 @@ class TestGridSamplerOp(OpTest):
self.grid_shape = (2, 7, 3, 2) self.grid_shape = (2, 7, 3, 2)
self.theta_shape = (2, 2, 3) self.theta_shape = (2, 2, 3)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -868,13 +868,12 @@ class TestBook(unittest.TestCase): ...@@ -868,13 +868,12 @@ class TestBook(unittest.TestCase):
def test_affine_grid_gen(self): def test_affine_grid_gen(self):
program = Program() program = Program()
with program_guard(program): with program_guard(program):
x = layers.data(name='x', shape=[2, 5, 7, 3 ], dtype='float32') x = layers.data(name='x', shape=[2, 5, 7, 3], dtype='float32')
grid = layers.data(name='grid', shape=[2, 5, 7, 2], dtype='float32' ) grid = layers.data(name='grid', shape=[2, 5, 7, 2], dtype='float32')
out = layers.grid_sampler(x, grid) out = layers.grid_sampler(x, grid)
self.assertIsNotNone(out) self.assertIsNotNone(out)
print(str(program)) print(str(program))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册