diff --git a/paddle/fluid/operators/affine_grid_op.cc b/paddle/fluid/operators/affine_grid_op.cc index 0ea28265a245c9cd1a35a79324a33f7cf208a159..6f7da445fc84fc1f14b01a633af0e886aec6f8ed 100644 --- a/paddle/fluid/operators/affine_grid_op.cc +++ b/paddle/fluid/operators/affine_grid_op.cc @@ -26,15 +26,13 @@ using Tensor = framework::Tensor; template struct Linspace { - framework::Tensor operator()(T start, T end, int count, - const framework::ExecutionContext& ctx) { - Tensor numbers; - T* number_data = numbers.mutable_data({count}, platform::CPUPlace()); + void operator()(T start, T end, int count, framework::Tensor* numbers, + const framework::ExecutionContext& ctx) { + T* number_data = numbers->mutable_data({count}, platform::CPUPlace()); T slice = (end - start) / (T)(count - 1); for (int i = 0; i < count; ++i) { number_data[i] = start + (T)i * slice; } - return numbers; } }; diff --git a/paddle/fluid/operators/affine_grid_op.h b/paddle/fluid/operators/affine_grid_op.h index 07e26c292c3bafc4d98bd392a9e1e21a9eb383a8..87d23831486e658374d4c011412fdef57be1b994 100644 --- a/paddle/fluid/operators/affine_grid_op.h +++ b/paddle/fluid/operators/affine_grid_op.h @@ -37,18 +37,65 @@ using Array4 = Eigen::DSizes; */ template struct Linspace { - framework::Tensor operator()(T start, T end, int count, - const framework::ExecutionContext& ctx); + void operator()(T start, T end, int count, framework::Tensor* numbers, + const framework::ExecutionContext& ctx); }; +template +inline void GetIdxMap(int n, int h, int w, Tensor* grid, + const framework::ExecutionContext& ctx) { + auto& place = *ctx.template device_context().eigen_device(); + grid->mutable_data({n, h, w, 3}, ctx.GetPlace()); + auto grid_t = EigenTensor::From(*grid); + // Get indexes of height with shape [height, width, 1] + Tensor h_idx; + Linspace linspace; + linspace((T)-1, (T)1, h, &h_idx, ctx); + auto h_idx_t = EigenTensor::From(h_idx); + // Get indexes of width with shape [height, width, 1] + Tensor w_idx; + linspace((T)-1, (T)1, w, &w_idx, ctx); + auto w_idx_t = EigenTensor::From(w_idx); + // Get constant ones tensor with shape [height, width, 1] + Tensor ones; + ones.mutable_data({h, w, 1}, ctx.GetPlace()); + auto ones_t = EigenTensor::From(ones).setConstant((T)1); + // Get grid tensor with shape [n, h, w, 3] by concatenating h_idx, w_idx and + // ones + Tensor w_idx_map; + w_idx_map.mutable_data({h, w, 1}, ctx.GetPlace()); + auto w_idx_map_t = EigenTensor::From(w_idx_map); + Tensor h_idx_map; + h_idx_map.mutable_data({h, w, 1}, ctx.GetPlace()); + auto h_idx_map_t = EigenTensor::From(h_idx_map); + Tensor w_h_idx_map; + w_h_idx_map.mutable_data({h, w, 2}, ctx.GetPlace()); + auto w_h_idx_map_t = EigenTensor::From(w_h_idx_map); + Tensor w_h_one_idx_map; + w_h_one_idx_map.mutable_data({h, w, 3}, ctx.GetPlace()); + auto w_h_one_idx_map_t = EigenTensor::From(w_h_one_idx_map); + + w_idx_map_t.device(place) = w_idx_t.reshape(Array2(1, w)) + .broadcast(Array2(h, 1)) + .reshape(Array3(h, w, 1)); + + h_idx_map_t.device(place) = h_idx_t.reshape(Array2(1, h)) + .broadcast(Array2(w, 1)) + .shuffle(Array2(1, 0)) + .reshape(Array3(h, w, 1)); + + w_h_idx_map_t.device(place) = w_idx_map_t.concatenate(h_idx_map_t, 2); + w_h_one_idx_map_t.device(place) = w_h_idx_map_t.concatenate(ones_t, 2); + grid_t.device(place) = w_h_one_idx_map_t.reshape(Array4(1, h, w, 3)) + .broadcast(Array4(n, 1, 1, 1)); +} + template class AffineGridOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto& place = *ctx.template device_context().eigen_device(); auto* theta = ctx.Input("Theta"); int n = theta->dims()[0]; - auto size_attr = ctx.Attr>("output_shape"); int h = 0; int w = 0; @@ -63,44 +110,13 @@ class AffineGridOpKernel : public framework::OpKernel { h = size_attr[2]; w = size_attr[3]; } - auto* output = ctx.Output("Output"); output->mutable_data({n, h, w, 2}, ctx.GetPlace()); - math::SetConstant()( ctx.template device_context(), output, static_cast(0)); - - Linspace linspace; - // Get indexes of height with shape [height, width, 1] - auto h_idx = linspace((T)-1, (T)1, h, ctx); - auto h_idx_t = EigenTensor::From(h_idx); - // Get indexes of width with shape [height, width, 1] - auto w_idx = linspace((T)-1, (T)1, w, ctx); - auto w_idx_t = EigenTensor::From(w_idx); - // Get constant ones tensor with shape [height, width, 1] - Tensor ones; - ones.mutable_data({h, w, 1}, ctx.GetPlace()); - auto ones_t = EigenTensor::From(ones).setConstant((T)1); - // Get grid tensor with shape [n, h, w, 3] by concatenating h_idx, w_idx and - // ones Tensor grid; - grid.mutable_data({n, h, w, 3}, ctx.GetPlace()); - auto grid_t = EigenTensor::From(grid); - - grid_t.device(place) = w_idx_t.reshape(Array2(1, w)) - .broadcast(Array2(h, 1)) - .reshape(Array3(h, w, 1)) - .concatenate(h_idx_t.reshape(Array2(1, h)) - .broadcast(Array2(w, 1)) - .shuffle(Array2(1, 0)) - .reshape(Array3(h, w, 1)), - 2) - .eval() - .concatenate(ones_t, 2) - .reshape(Array4(1, h, w, 3)) - .broadcast(Array4(n, 1, 1, 1)); - + GetIdxMap(n, h, w, &grid, ctx); // output = grid * theta.T // TODO(wanghaoshuang): Refine batched matrix multiply auto blas = math::GetBlas(ctx); @@ -118,10 +134,8 @@ template class AffineGridGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto& place = *ctx.template device_context().eigen_device(); auto output_grad = ctx.Input(framework::GradVarName("Output")); auto theta_grad = ctx.Output(framework::GradVarName("Theta")); - int n = output_grad->dims()[0]; auto size_attr = ctx.Attr>("output_shape"); int h = 0; @@ -137,42 +151,12 @@ class AffineGridGradOpKernel : public framework::OpKernel { h = size_attr[2]; w = size_attr[3]; } - theta_grad->mutable_data({n, 2, 3}, ctx.GetPlace()); - math::SetConstant()( ctx.template device_context(), theta_grad, static_cast(0)); - - Linspace linspace; - - // Get indexes of height with shape [height, width, 1] - auto h_idx = linspace((T)-1, (T)1, h, ctx); - auto h_idx_t = EigenTensor::From(h_idx); - // Get indexes of width with shape [height, width, 1] - auto w_idx = linspace((T)-1, (T)1, w, ctx); - auto w_idx_t = EigenTensor::From(w_idx); - // Get constant ones tensor with shape [height, width, 1] - Tensor ones; - ones.mutable_data({h, w, 1}, ctx.GetPlace()); - auto ones_t = EigenTensor::From(ones).setConstant((T)1); - // Get grid tensor with shape [n, h, w, 3] by concatenating h_idx, w_idx and - // ones Tensor grid; - grid.mutable_data({n, h, w, 3}, ctx.GetPlace()); - auto grid_t = EigenTensor::From(grid); - grid_t.device(place) = w_idx_t.reshape(Array2(1, w)) - .broadcast(Array2(h, 1)) - .reshape(Array3(h, w, 1)) - .concatenate(h_idx_t.reshape(Array2(1, h)) - .broadcast(Array2(w, 1)) - .shuffle(Array2(1, 0)) - .reshape(Array3(h, w, 1)), - 2) - .eval() - .concatenate(ones_t, 2) - .reshape(Array4(1, h, w, 3)) - .broadcast(Array4(n, 1, 1, 1)); + GetIdxMap(n, h, w, &grid, ctx); // output = grid * theta.T // TODO(wanghaoshuang): Refine batched matrix multiply auto blas = math::GetBlas(ctx);